Update Scintilla to version 3.10.4
[geany-mirror.git] / scintilla / src / Document.cxx
blobdd11ae42dc1db1d727989c2f9c8a788e94e2dcc1
1 // Scintilla source code edit control
2 /** @file Document.cxx
3 ** Text document that handles notifications, DBCS, styling, words and end of line.
4 **/
5 // Copyright 1998-2011 by Neil Hodgson <neilh@scintilla.org>
6 // The License.txt file describes the conditions under which this software may be distributed.
8 #include <cstddef>
9 #include <cstdlib>
10 #include <cassert>
11 #include <cstring>
12 #include <cstdio>
13 #include <cmath>
15 #include <stdexcept>
16 #include <string>
17 #include <vector>
18 #include <forward_list>
19 #include <algorithm>
20 #include <memory>
21 #include <chrono>
23 #ifndef NO_CXX11_REGEX
24 #include <regex>
25 #endif
27 #include "Platform.h"
29 #include "ILoader.h"
30 #include "ILexer.h"
31 #include "Scintilla.h"
33 #include "CharacterSet.h"
34 #include "CharacterCategory.h"
35 #include "Position.h"
36 #include "SplitVector.h"
37 #include "Partitioning.h"
38 #include "RunStyles.h"
39 #include "CellBuffer.h"
40 #include "PerLine.h"
41 #include "CharClassify.h"
42 #include "Decoration.h"
43 #include "CaseFolder.h"
44 #include "Document.h"
45 #include "RESearch.h"
46 #include "UniConversion.h"
47 #include "ElapsedPeriod.h"
49 using namespace Scintilla;
51 void LexInterface::Colourise(Sci::Position start, Sci::Position end) {
52 if (pdoc && instance && !performingStyle) {
53 // Protect against reentrance, which may occur, for example, when
54 // fold points are discovered while performing styling and the folding
55 // code looks for child lines which may trigger styling.
56 performingStyle = true;
58 const Sci::Position lengthDoc = pdoc->Length();
59 if (end == -1)
60 end = lengthDoc;
61 const Sci::Position len = end - start;
63 PLATFORM_ASSERT(len >= 0);
64 PLATFORM_ASSERT(start + len <= lengthDoc);
66 int styleStart = 0;
67 if (start > 0)
68 styleStart = pdoc->StyleAt(start - 1);
70 if (len > 0) {
71 instance->Lex(start, len, styleStart, pdoc);
72 instance->Fold(start, len, styleStart, pdoc);
75 performingStyle = false;
79 int LexInterface::LineEndTypesSupported() {
80 if (instance) {
81 const int interfaceVersion = instance->Version();
82 if (interfaceVersion >= lvSubStyles) {
83 ILexerWithSubStyles *ssinstance = static_cast<ILexerWithSubStyles *>(instance);
84 return ssinstance->LineEndTypesSupported();
87 return 0;
90 ActionDuration::ActionDuration(double duration_, double minDuration_, double maxDuration_) noexcept :
91 duration(duration_), minDuration(minDuration_), maxDuration(maxDuration_) {
94 void ActionDuration::AddSample(size_t numberActions, double durationOfActions) noexcept {
95 // Only adjust for multiple actions to avoid instability
96 if (numberActions < 8)
97 return;
99 // Alpha value for exponential smoothing.
100 // Most recent value contributes 25% to smoothed value.
101 const double alpha = 0.25;
103 const double durationOne = durationOfActions / numberActions;
104 duration = Sci::clamp(alpha * durationOne + (1.0 - alpha) * duration,
105 minDuration, maxDuration);
108 double ActionDuration::Duration() const noexcept {
109 return duration;
112 Document::Document(int options) :
113 cb((options & SC_DOCUMENTOPTION_STYLES_NONE) == 0, (options & SC_DOCUMENTOPTION_TEXT_LARGE) != 0),
114 durationStyleOneLine(0.00001, 0.000001, 0.0001) {
115 refCount = 0;
116 #ifdef _WIN32
117 eolMode = SC_EOL_CRLF;
118 #else
119 eolMode = SC_EOL_LF;
120 #endif
121 dbcsCodePage = SC_CP_UTF8;
122 lineEndBitSet = SC_LINE_END_TYPE_DEFAULT;
123 endStyled = 0;
124 styleClock = 0;
125 enteredModification = 0;
126 enteredStyling = 0;
127 enteredReadOnlyCount = 0;
128 insertionSet = false;
129 tabInChars = 8;
130 indentInChars = 0;
131 actualIndentInChars = 8;
132 useTabs = true;
133 tabIndents = true;
134 backspaceUnindents = false;
136 matchesValid = false;
138 perLineData[ldMarkers].reset(new LineMarkers());
139 perLineData[ldLevels].reset(new LineLevels());
140 perLineData[ldState].reset(new LineState());
141 perLineData[ldMargin].reset(new LineAnnotation());
142 perLineData[ldAnnotation].reset(new LineAnnotation());
144 decorations = DecorationListCreate(IsLarge());
146 cb.SetPerLine(this);
147 cb.SetUTF8Substance(SC_CP_UTF8 == dbcsCodePage);
150 Document::~Document() {
151 for (const WatcherWithUserData &watcher : watchers) {
152 watcher.watcher->NotifyDeleted(this, watcher.userData);
156 // Increase reference count and return its previous value.
157 int Document::AddRef() {
158 return refCount++;
161 // Decrease reference count and return its previous value.
162 // Delete the document if reference count reaches zero.
163 int SCI_METHOD Document::Release() {
164 const int curRefCount = --refCount;
165 if (curRefCount == 0)
166 delete this;
167 return curRefCount;
170 void Document::Init() {
171 for (const std::unique_ptr<PerLine> &pl : perLineData) {
172 if (pl)
173 pl->Init();
177 void Document::InsertLine(Sci::Line line) {
178 for (const std::unique_ptr<PerLine> &pl : perLineData) {
179 if (pl)
180 pl->InsertLine(line);
184 void Document::RemoveLine(Sci::Line line) {
185 for (const std::unique_ptr<PerLine> &pl : perLineData) {
186 if (pl)
187 pl->RemoveLine(line);
191 LineMarkers *Document::Markers() const {
192 return static_cast<LineMarkers *>(perLineData[ldMarkers].get());
195 LineLevels *Document::Levels() const {
196 return static_cast<LineLevels *>(perLineData[ldLevels].get());
199 LineState *Document::States() const {
200 return static_cast<LineState *>(perLineData[ldState].get());
203 LineAnnotation *Document::Margins() const {
204 return static_cast<LineAnnotation *>(perLineData[ldMargin].get());
207 LineAnnotation *Document::Annotations() const {
208 return static_cast<LineAnnotation *>(perLineData[ldAnnotation].get());
211 int Document::LineEndTypesSupported() const {
212 if ((SC_CP_UTF8 == dbcsCodePage) && pli)
213 return pli->LineEndTypesSupported();
214 else
215 return 0;
218 bool Document::SetDBCSCodePage(int dbcsCodePage_) {
219 if (dbcsCodePage != dbcsCodePage_) {
220 dbcsCodePage = dbcsCodePage_;
221 SetCaseFolder(nullptr);
222 cb.SetLineEndTypes(lineEndBitSet & LineEndTypesSupported());
223 cb.SetUTF8Substance(SC_CP_UTF8 == dbcsCodePage);
224 ModifiedAt(0); // Need to restyle whole document
225 return true;
226 } else {
227 return false;
231 bool Document::SetLineEndTypesAllowed(int lineEndBitSet_) {
232 if (lineEndBitSet != lineEndBitSet_) {
233 lineEndBitSet = lineEndBitSet_;
234 const int lineEndBitSetActive = lineEndBitSet & LineEndTypesSupported();
235 if (lineEndBitSetActive != cb.GetLineEndTypes()) {
236 ModifiedAt(0);
237 cb.SetLineEndTypes(lineEndBitSetActive);
238 return true;
239 } else {
240 return false;
242 } else {
243 return false;
247 void Document::SetSavePoint() {
248 cb.SetSavePoint();
249 NotifySavePoint(true);
252 void Document::TentativeUndo() {
253 if (!TentativeActive())
254 return;
255 CheckReadOnly();
256 if (enteredModification == 0) {
257 enteredModification++;
258 if (!cb.IsReadOnly()) {
259 const bool startSavePoint = cb.IsSavePoint();
260 bool multiLine = false;
261 const int steps = cb.TentativeSteps();
262 //Platform::DebugPrintf("Steps=%d\n", steps);
263 for (int step = 0; step < steps; step++) {
264 const Sci::Line prevLinesTotal = LinesTotal();
265 const Action &action = cb.GetUndoStep();
266 if (action.at == removeAction) {
267 NotifyModified(DocModification(
268 SC_MOD_BEFOREINSERT | SC_PERFORMED_UNDO, action));
269 } else if (action.at == containerAction) {
270 DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_UNDO);
271 dm.token = action.position;
272 NotifyModified(dm);
273 } else {
274 NotifyModified(DocModification(
275 SC_MOD_BEFOREDELETE | SC_PERFORMED_UNDO, action));
277 cb.PerformUndoStep();
278 if (action.at != containerAction) {
279 ModifiedAt(action.position);
282 int modFlags = SC_PERFORMED_UNDO;
283 // With undo, an insertion action becomes a deletion notification
284 if (action.at == removeAction) {
285 modFlags |= SC_MOD_INSERTTEXT;
286 } else if (action.at == insertAction) {
287 modFlags |= SC_MOD_DELETETEXT;
289 if (steps > 1)
290 modFlags |= SC_MULTISTEPUNDOREDO;
291 const Sci::Line linesAdded = LinesTotal() - prevLinesTotal;
292 if (linesAdded != 0)
293 multiLine = true;
294 if (step == steps - 1) {
295 modFlags |= SC_LASTSTEPINUNDOREDO;
296 if (multiLine)
297 modFlags |= SC_MULTILINEUNDOREDO;
299 NotifyModified(DocModification(modFlags, action.position, action.lenData,
300 linesAdded, action.data.get()));
303 const bool endSavePoint = cb.IsSavePoint();
304 if (startSavePoint != endSavePoint)
305 NotifySavePoint(endSavePoint);
307 cb.TentativeCommit();
309 enteredModification--;
313 int Document::GetMark(Sci::Line line) const {
314 return Markers()->MarkValue(line);
317 Sci::Line Document::MarkerNext(Sci::Line lineStart, int mask) const {
318 return Markers()->MarkerNext(lineStart, mask);
321 int Document::AddMark(Sci::Line line, int markerNum) {
322 if (line >= 0 && line <= LinesTotal()) {
323 const int prev = Markers()->AddMark(line, markerNum, LinesTotal());
324 const DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, nullptr, line);
325 NotifyModified(mh);
326 return prev;
327 } else {
328 return -1;
332 void Document::AddMarkSet(Sci::Line line, int valueSet) {
333 if (line < 0 || line > LinesTotal()) {
334 return;
336 unsigned int m = valueSet;
337 for (int i = 0; m; i++, m >>= 1) {
338 if (m & 1)
339 Markers()->AddMark(line, i, LinesTotal());
341 const DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, nullptr, line);
342 NotifyModified(mh);
345 void Document::DeleteMark(Sci::Line line, int markerNum) {
346 Markers()->DeleteMark(line, markerNum, false);
347 const DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, nullptr, line);
348 NotifyModified(mh);
351 void Document::DeleteMarkFromHandle(int markerHandle) {
352 Markers()->DeleteMarkFromHandle(markerHandle);
353 DocModification mh(SC_MOD_CHANGEMARKER);
354 mh.line = -1;
355 NotifyModified(mh);
358 void Document::DeleteAllMarks(int markerNum) {
359 bool someChanges = false;
360 for (Sci::Line line = 0; line < LinesTotal(); line++) {
361 if (Markers()->DeleteMark(line, markerNum, true))
362 someChanges = true;
364 if (someChanges) {
365 DocModification mh(SC_MOD_CHANGEMARKER);
366 mh.line = -1;
367 NotifyModified(mh);
371 Sci::Line Document::LineFromHandle(int markerHandle) const {
372 return Markers()->LineFromHandle(markerHandle);
375 Sci_Position SCI_METHOD Document::LineStart(Sci_Position line) const {
376 return cb.LineStart(static_cast<Sci::Line>(line));
379 bool Document::IsLineStartPosition(Sci::Position position) const {
380 return LineStart(LineFromPosition(position)) == position;
383 Sci_Position SCI_METHOD Document::LineEnd(Sci_Position line) const {
384 if (line >= LinesTotal() - 1) {
385 return LineStart(line + 1);
386 } else {
387 Sci::Position position = LineStart(line + 1);
388 if (SC_CP_UTF8 == dbcsCodePage) {
389 const unsigned char bytes[] = {
390 cb.UCharAt(position-3),
391 cb.UCharAt(position-2),
392 cb.UCharAt(position-1),
394 if (UTF8IsSeparator(bytes)) {
395 return position - UTF8SeparatorLength;
397 if (UTF8IsNEL(bytes+1)) {
398 return position - UTF8NELLength;
401 position--; // Back over CR or LF
402 // When line terminator is CR+LF, may need to go back one more
403 if ((position > LineStart(line)) && (cb.CharAt(position - 1) == '\r')) {
404 position--;
406 return position;
410 void SCI_METHOD Document::SetErrorStatus(int status) {
411 // Tell the watchers an error has occurred.
412 for (const WatcherWithUserData &watcher : watchers) {
413 watcher.watcher->NotifyErrorOccurred(this, watcher.userData, status);
417 Sci_Position SCI_METHOD Document::LineFromPosition(Sci_Position pos) const {
418 return cb.LineFromPosition(pos);
421 Sci::Line Document::SciLineFromPosition(Sci::Position pos) const noexcept {
422 // Avoids casting in callers for this very common function
423 return cb.LineFromPosition(pos);
426 Sci::Position Document::LineEndPosition(Sci::Position position) const {
427 return LineEnd(LineFromPosition(position));
430 bool Document::IsLineEndPosition(Sci::Position position) const {
431 return LineEnd(LineFromPosition(position)) == position;
434 bool Document::IsPositionInLineEnd(Sci::Position position) const {
435 return position >= LineEnd(LineFromPosition(position));
438 Sci::Position Document::VCHomePosition(Sci::Position position) const {
439 const Sci::Line line = SciLineFromPosition(position);
440 const Sci::Position startPosition = LineStart(line);
441 const Sci::Position endLine = LineEnd(line);
442 Sci::Position startText = startPosition;
443 while (startText < endLine && (cb.CharAt(startText) == ' ' || cb.CharAt(startText) == '\t'))
444 startText++;
445 if (position == startText)
446 return startPosition;
447 else
448 return startText;
451 Sci::Position Document::IndexLineStart(Sci::Line line, int lineCharacterIndex) const {
452 return cb.IndexLineStart(line, lineCharacterIndex);
455 Sci::Line Document::LineFromPositionIndex(Sci::Position pos, int lineCharacterIndex) const {
456 return cb.LineFromPositionIndex(pos, lineCharacterIndex);
459 int SCI_METHOD Document::SetLevel(Sci_Position line, int level) {
460 const int prev = Levels()->SetLevel(static_cast<Sci::Line>(line), level, LinesTotal());
461 if (prev != level) {
462 DocModification mh(SC_MOD_CHANGEFOLD | SC_MOD_CHANGEMARKER,
463 LineStart(line), 0, 0, nullptr, static_cast<Sci::Line>(line));
464 mh.foldLevelNow = level;
465 mh.foldLevelPrev = prev;
466 NotifyModified(mh);
468 return prev;
471 int SCI_METHOD Document::GetLevel(Sci_Position line) const {
472 return Levels()->GetLevel(static_cast<Sci::Line>(line));
475 void Document::ClearLevels() {
476 Levels()->ClearLevels();
479 static bool IsSubordinate(int levelStart, int levelTry) noexcept {
480 if (levelTry & SC_FOLDLEVELWHITEFLAG)
481 return true;
482 else
483 return LevelNumber(levelStart) < LevelNumber(levelTry);
486 Sci::Line Document::GetLastChild(Sci::Line lineParent, int level, Sci::Line lastLine) {
487 if (level == -1)
488 level = LevelNumber(GetLevel(lineParent));
489 const Sci::Line maxLine = LinesTotal();
490 const Sci::Line lookLastLine = (lastLine != -1) ? std::min(LinesTotal() - 1, lastLine) : -1;
491 Sci::Line lineMaxSubord = lineParent;
492 while (lineMaxSubord < maxLine - 1) {
493 EnsureStyledTo(LineStart(lineMaxSubord + 2));
494 if (!IsSubordinate(level, GetLevel(lineMaxSubord + 1)))
495 break;
496 if ((lookLastLine != -1) && (lineMaxSubord >= lookLastLine) && !(GetLevel(lineMaxSubord) & SC_FOLDLEVELWHITEFLAG))
497 break;
498 lineMaxSubord++;
500 if (lineMaxSubord > lineParent) {
501 if (level > LevelNumber(GetLevel(lineMaxSubord + 1))) {
502 // Have chewed up some whitespace that belongs to a parent so seek back
503 if (GetLevel(lineMaxSubord) & SC_FOLDLEVELWHITEFLAG) {
504 lineMaxSubord--;
508 return lineMaxSubord;
511 Sci::Line Document::GetFoldParent(Sci::Line line) const {
512 const int level = LevelNumber(GetLevel(line));
513 Sci::Line lineLook = line - 1;
514 while ((lineLook > 0) && (
515 (!(GetLevel(lineLook) & SC_FOLDLEVELHEADERFLAG)) ||
516 (LevelNumber(GetLevel(lineLook)) >= level))
518 lineLook--;
520 if ((GetLevel(lineLook) & SC_FOLDLEVELHEADERFLAG) &&
521 (LevelNumber(GetLevel(lineLook)) < level)) {
522 return lineLook;
523 } else {
524 return -1;
528 void Document::GetHighlightDelimiters(HighlightDelimiter &highlightDelimiter, Sci::Line line, Sci::Line lastLine) {
529 const int level = GetLevel(line);
530 const Sci::Line lookLastLine = std::max(line, lastLine) + 1;
532 Sci::Line lookLine = line;
533 int lookLineLevel = level;
534 int lookLineLevelNum = LevelNumber(lookLineLevel);
535 while ((lookLine > 0) && ((lookLineLevel & SC_FOLDLEVELWHITEFLAG) ||
536 ((lookLineLevel & SC_FOLDLEVELHEADERFLAG) && (lookLineLevelNum >= LevelNumber(GetLevel(lookLine + 1)))))) {
537 lookLineLevel = GetLevel(--lookLine);
538 lookLineLevelNum = LevelNumber(lookLineLevel);
541 Sci::Line beginFoldBlock = (lookLineLevel & SC_FOLDLEVELHEADERFLAG) ? lookLine : GetFoldParent(lookLine);
542 if (beginFoldBlock == -1) {
543 highlightDelimiter.Clear();
544 return;
547 Sci::Line endFoldBlock = GetLastChild(beginFoldBlock, -1, lookLastLine);
548 Sci::Line firstChangeableLineBefore = -1;
549 if (endFoldBlock < line) {
550 lookLine = beginFoldBlock - 1;
551 lookLineLevel = GetLevel(lookLine);
552 lookLineLevelNum = LevelNumber(lookLineLevel);
553 while ((lookLine >= 0) && (lookLineLevelNum >= SC_FOLDLEVELBASE)) {
554 if (lookLineLevel & SC_FOLDLEVELHEADERFLAG) {
555 if (GetLastChild(lookLine, -1, lookLastLine) == line) {
556 beginFoldBlock = lookLine;
557 endFoldBlock = line;
558 firstChangeableLineBefore = line - 1;
561 if ((lookLine > 0) && (lookLineLevelNum == SC_FOLDLEVELBASE) && (LevelNumber(GetLevel(lookLine - 1)) > lookLineLevelNum))
562 break;
563 lookLineLevel = GetLevel(--lookLine);
564 lookLineLevelNum = LevelNumber(lookLineLevel);
567 if (firstChangeableLineBefore == -1) {
568 for (lookLine = line - 1, lookLineLevel = GetLevel(lookLine), lookLineLevelNum = LevelNumber(lookLineLevel);
569 lookLine >= beginFoldBlock;
570 lookLineLevel = GetLevel(--lookLine), lookLineLevelNum = LevelNumber(lookLineLevel)) {
571 if ((lookLineLevel & SC_FOLDLEVELWHITEFLAG) || (lookLineLevelNum > LevelNumber(level))) {
572 firstChangeableLineBefore = lookLine;
573 break;
577 if (firstChangeableLineBefore == -1)
578 firstChangeableLineBefore = beginFoldBlock - 1;
580 Sci::Line firstChangeableLineAfter = -1;
581 for (lookLine = line + 1, lookLineLevel = GetLevel(lookLine), lookLineLevelNum = LevelNumber(lookLineLevel);
582 lookLine <= endFoldBlock;
583 lookLineLevel = GetLevel(++lookLine), lookLineLevelNum = LevelNumber(lookLineLevel)) {
584 if ((lookLineLevel & SC_FOLDLEVELHEADERFLAG) && (lookLineLevelNum < LevelNumber(GetLevel(lookLine + 1)))) {
585 firstChangeableLineAfter = lookLine;
586 break;
589 if (firstChangeableLineAfter == -1)
590 firstChangeableLineAfter = endFoldBlock + 1;
592 highlightDelimiter.beginFoldBlock = beginFoldBlock;
593 highlightDelimiter.endFoldBlock = endFoldBlock;
594 highlightDelimiter.firstChangeableLineBefore = firstChangeableLineBefore;
595 highlightDelimiter.firstChangeableLineAfter = firstChangeableLineAfter;
598 Sci::Position Document::ClampPositionIntoDocument(Sci::Position pos) const {
599 return Sci::clamp(pos, static_cast<Sci::Position>(0), static_cast<Sci::Position>(Length()));
602 bool Document::IsCrLf(Sci::Position pos) const {
603 if (pos < 0)
604 return false;
605 if (pos >= (Length() - 1))
606 return false;
607 return (cb.CharAt(pos) == '\r') && (cb.CharAt(pos + 1) == '\n');
610 int Document::LenChar(Sci::Position pos) {
611 if (pos < 0) {
612 return 1;
613 } else if (IsCrLf(pos)) {
614 return 2;
615 } else if (SC_CP_UTF8 == dbcsCodePage) {
616 const unsigned char leadByte = cb.UCharAt(pos);
617 const int widthCharBytes = UTF8BytesOfLead[leadByte];
618 const Sci::Position lengthDoc = Length();
619 if ((pos + widthCharBytes) > lengthDoc)
620 return static_cast<int>(lengthDoc - pos);
621 else
622 return widthCharBytes;
623 } else if (dbcsCodePage) {
624 return IsDBCSLeadByteNoExcept(cb.CharAt(pos)) ? 2 : 1;
625 } else {
626 return 1;
630 bool Document::InGoodUTF8(Sci::Position pos, Sci::Position &start, Sci::Position &end) const noexcept {
631 Sci::Position trail = pos;
632 while ((trail>0) && (pos-trail < UTF8MaxBytes) && UTF8IsTrailByte(cb.UCharAt(trail-1)))
633 trail--;
634 start = (trail > 0) ? trail-1 : trail;
636 const unsigned char leadByte = cb.UCharAt(start);
637 const int widthCharBytes = UTF8BytesOfLead[leadByte];
638 if (widthCharBytes == 1) {
639 return false;
640 } else {
641 const int trailBytes = widthCharBytes - 1;
642 const Sci::Position len = pos - start;
643 if (len > trailBytes)
644 // pos too far from lead
645 return false;
646 unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0};
647 for (Sci::Position b=1; b<widthCharBytes && ((start+b) < cb.Length()); b++)
648 charBytes[b] = cb.CharAt(start+b);
649 const int utf8status = UTF8Classify(charBytes, widthCharBytes);
650 if (utf8status & UTF8MaskInvalid)
651 return false;
652 end = start + widthCharBytes;
653 return true;
657 // Normalise a position so that it is not halfway through a two byte character.
658 // This can occur in two situations -
659 // When lines are terminated with \r\n pairs which should be treated as one character.
660 // When displaying DBCS text such as Japanese.
661 // If moving, move the position in the indicated direction.
662 Sci::Position Document::MovePositionOutsideChar(Sci::Position pos, Sci::Position moveDir, bool checkLineEnd) const {
663 //Platform::DebugPrintf("NoCRLF %d %d\n", pos, moveDir);
664 // If out of range, just return minimum/maximum value.
665 if (pos <= 0)
666 return 0;
667 if (pos >= Length())
668 return Length();
670 // PLATFORM_ASSERT(pos > 0 && pos < Length());
671 if (checkLineEnd && IsCrLf(pos - 1)) {
672 if (moveDir > 0)
673 return pos + 1;
674 else
675 return pos - 1;
678 if (dbcsCodePage) {
679 if (SC_CP_UTF8 == dbcsCodePage) {
680 const unsigned char ch = cb.UCharAt(pos);
681 // If ch is not a trail byte then pos is valid intercharacter position
682 if (UTF8IsTrailByte(ch)) {
683 Sci::Position startUTF = pos;
684 Sci::Position endUTF = pos;
685 if (InGoodUTF8(pos, startUTF, endUTF)) {
686 // ch is a trail byte within a UTF-8 character
687 if (moveDir > 0)
688 pos = endUTF;
689 else
690 pos = startUTF;
692 // Else invalid UTF-8 so return position of isolated trail byte
694 } else {
695 // Anchor DBCS calculations at start of line because start of line can
696 // not be a DBCS trail byte.
697 const Sci::Position posStartLine = LineStart(LineFromPosition(pos));
698 if (pos == posStartLine)
699 return pos;
701 // Step back until a non-lead-byte is found.
702 Sci::Position posCheck = pos;
703 while ((posCheck > posStartLine) && IsDBCSLeadByteNoExcept(cb.CharAt(posCheck-1)))
704 posCheck--;
706 // Check from known start of character.
707 while (posCheck < pos) {
708 const int mbsize = IsDBCSLeadByteNoExcept(cb.CharAt(posCheck)) ? 2 : 1;
709 if (posCheck + mbsize == pos) {
710 return pos;
711 } else if (posCheck + mbsize > pos) {
712 if (moveDir > 0) {
713 return posCheck + mbsize;
714 } else {
715 return posCheck;
718 posCheck += mbsize;
723 return pos;
726 // NextPosition moves between valid positions - it can not handle a position in the middle of a
727 // multi-byte character. It is used to iterate through text more efficiently than MovePositionOutsideChar.
728 // A \r\n pair is treated as two characters.
729 Sci::Position Document::NextPosition(Sci::Position pos, int moveDir) const noexcept {
730 // If out of range, just return minimum/maximum value.
731 const int increment = (moveDir > 0) ? 1 : -1;
732 if (pos + increment <= 0)
733 return 0;
734 if (pos + increment >= cb.Length())
735 return cb.Length();
737 if (dbcsCodePage) {
738 if (SC_CP_UTF8 == dbcsCodePage) {
739 if (increment == 1) {
740 // Simple forward movement case so can avoid some checks
741 const unsigned char leadByte = cb.UCharAt(pos);
742 if (UTF8IsAscii(leadByte)) {
743 // Single byte character or invalid
744 pos++;
745 } else {
746 const int widthCharBytes = UTF8BytesOfLead[leadByte];
747 unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0};
748 for (int b=1; b<widthCharBytes; b++)
749 charBytes[b] = cb.CharAt(pos+b);
750 const int utf8status = UTF8Classify(charBytes, widthCharBytes);
751 if (utf8status & UTF8MaskInvalid)
752 pos++;
753 else
754 pos += utf8status & UTF8MaskWidth;
756 } else {
757 // Examine byte before position
758 pos--;
759 const unsigned char ch = cb.UCharAt(pos);
760 // If ch is not a trail byte then pos is valid intercharacter position
761 if (UTF8IsTrailByte(ch)) {
762 // If ch is a trail byte in a valid UTF-8 character then return start of character
763 Sci::Position startUTF = pos;
764 Sci::Position endUTF = pos;
765 if (InGoodUTF8(pos, startUTF, endUTF)) {
766 pos = startUTF;
768 // Else invalid UTF-8 so return position of isolated trail byte
771 } else {
772 if (moveDir > 0) {
773 const int mbsize = IsDBCSLeadByteNoExcept(cb.CharAt(pos)) ? 2 : 1;
774 pos += mbsize;
775 if (pos > cb.Length())
776 pos = cb.Length();
777 } else {
778 // Anchor DBCS calculations at start of line because start of line can
779 // not be a DBCS trail byte.
780 const Sci::Position posStartLine = cb.LineStart(cb.LineFromPosition(pos));
781 // See http://msdn.microsoft.com/en-us/library/cc194792%28v=MSDN.10%29.aspx
782 // http://msdn.microsoft.com/en-us/library/cc194790.aspx
783 if ((pos - 1) <= posStartLine) {
784 return pos - 1;
785 } else if (IsDBCSLeadByteNoExcept(cb.CharAt(pos - 1))) {
786 // Must actually be trail byte
787 return pos - 2;
788 } else {
789 // Otherwise, step back until a non-lead-byte is found.
790 Sci::Position posTemp = pos - 1;
791 while (posStartLine <= --posTemp && IsDBCSLeadByteNoExcept(cb.CharAt(posTemp)))
793 // Now posTemp+1 must point to the beginning of a character,
794 // so figure out whether we went back an even or an odd
795 // number of bytes and go back 1 or 2 bytes, respectively.
796 return (pos - 1 - ((pos - posTemp) & 1));
800 } else {
801 pos += increment;
804 return pos;
807 bool Document::NextCharacter(Sci::Position &pos, int moveDir) const noexcept {
808 // Returns true if pos changed
809 Sci::Position posNext = NextPosition(pos, moveDir);
810 if (posNext == pos) {
811 return false;
812 } else {
813 pos = posNext;
814 return true;
818 Document::CharacterExtracted Document::CharacterAfter(Sci::Position position) const {
819 if (position >= Length()) {
820 return CharacterExtracted(unicodeReplacementChar, 0);
822 const unsigned char leadByte = cb.UCharAt(position);
823 if (!dbcsCodePage || UTF8IsAscii(leadByte)) {
824 // Common case: ASCII character
825 return CharacterExtracted(leadByte, 1);
827 if (SC_CP_UTF8 == dbcsCodePage) {
828 const int widthCharBytes = UTF8BytesOfLead[leadByte];
829 unsigned char charBytes[UTF8MaxBytes] = { leadByte, 0, 0, 0 };
830 for (int b = 1; b<widthCharBytes; b++)
831 charBytes[b] = cb.UCharAt(position + b);
832 const int utf8status = UTF8Classify(charBytes, widthCharBytes);
833 if (utf8status & UTF8MaskInvalid) {
834 // Treat as invalid and use up just one byte
835 return CharacterExtracted(unicodeReplacementChar, 1);
836 } else {
837 return CharacterExtracted(UnicodeFromUTF8(charBytes), utf8status & UTF8MaskWidth);
839 } else {
840 if (IsDBCSLeadByteNoExcept(leadByte) && ((position + 1) < Length())) {
841 return CharacterExtracted::DBCS(leadByte, cb.UCharAt(position + 1));
842 } else {
843 return CharacterExtracted(leadByte, 1);
848 Document::CharacterExtracted Document::CharacterBefore(Sci::Position position) const {
849 if (position <= 0) {
850 return CharacterExtracted(unicodeReplacementChar, 0);
852 const unsigned char previousByte = cb.UCharAt(position - 1);
853 if (0 == dbcsCodePage) {
854 return CharacterExtracted(previousByte, 1);
856 if (SC_CP_UTF8 == dbcsCodePage) {
857 if (UTF8IsAscii(previousByte)) {
858 return CharacterExtracted(previousByte, 1);
860 position--;
861 // If previousByte is not a trail byte then its invalid
862 if (UTF8IsTrailByte(previousByte)) {
863 // If previousByte is a trail byte in a valid UTF-8 character then find start of character
864 Sci::Position startUTF = position;
865 Sci::Position endUTF = position;
866 if (InGoodUTF8(position, startUTF, endUTF)) {
867 const int widthCharBytes = static_cast<int>(endUTF - startUTF);
868 unsigned char charBytes[UTF8MaxBytes] = { 0, 0, 0, 0 };
869 for (int b = 0; b<widthCharBytes; b++)
870 charBytes[b] = cb.UCharAt(startUTF + b);
871 const int utf8status = UTF8Classify(charBytes, widthCharBytes);
872 if (utf8status & UTF8MaskInvalid) {
873 // Treat as invalid and use up just one byte
874 return CharacterExtracted(unicodeReplacementChar, 1);
875 } else {
876 return CharacterExtracted(UnicodeFromUTF8(charBytes), utf8status & UTF8MaskWidth);
879 // Else invalid UTF-8 so return position of isolated trail byte
881 return CharacterExtracted(unicodeReplacementChar, 1);
882 } else {
883 // Moving backwards in DBCS is complex so use NextPosition
884 const Sci::Position posStartCharacter = NextPosition(position, -1);
885 return CharacterAfter(posStartCharacter);
889 // Return -1 on out-of-bounds
890 Sci_Position SCI_METHOD Document::GetRelativePosition(Sci_Position positionStart, Sci_Position characterOffset) const {
891 Sci::Position pos = positionStart;
892 if (dbcsCodePage) {
893 const int increment = (characterOffset > 0) ? 1 : -1;
894 while (characterOffset != 0) {
895 const Sci::Position posNext = NextPosition(pos, increment);
896 if (posNext == pos)
897 return INVALID_POSITION;
898 pos = posNext;
899 characterOffset -= increment;
901 } else {
902 pos = positionStart + characterOffset;
903 if ((pos < 0) || (pos > Length()))
904 return INVALID_POSITION;
906 return pos;
909 Sci::Position Document::GetRelativePositionUTF16(Sci::Position positionStart, Sci::Position characterOffset) const {
910 Sci::Position pos = positionStart;
911 if (dbcsCodePage) {
912 const int increment = (characterOffset > 0) ? 1 : -1;
913 while (characterOffset != 0) {
914 const Sci::Position posNext = NextPosition(pos, increment);
915 if (posNext == pos)
916 return INVALID_POSITION;
917 if (std::abs(pos-posNext) > 3) // 4 byte character = 2*UTF16.
918 characterOffset -= increment;
919 pos = posNext;
920 characterOffset -= increment;
922 } else {
923 pos = positionStart + characterOffset;
924 if ((pos < 0) || (pos > Length()))
925 return INVALID_POSITION;
927 return pos;
930 int SCI_METHOD Document::GetCharacterAndWidth(Sci_Position position, Sci_Position *pWidth) const {
931 int character;
932 int bytesInCharacter = 1;
933 const unsigned char leadByte = cb.UCharAt(position);
934 if (dbcsCodePage) {
935 if (SC_CP_UTF8 == dbcsCodePage) {
936 if (UTF8IsAscii(leadByte)) {
937 // Single byte character or invalid
938 character = leadByte;
939 } else {
940 const int widthCharBytes = UTF8BytesOfLead[leadByte];
941 unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0};
942 for (int b=1; b<widthCharBytes; b++)
943 charBytes[b] = cb.UCharAt(position+b);
944 const int utf8status = UTF8Classify(charBytes, widthCharBytes);
945 if (utf8status & UTF8MaskInvalid) {
946 // Report as singleton surrogate values which are invalid Unicode
947 character = 0xDC80 + leadByte;
948 } else {
949 bytesInCharacter = utf8status & UTF8MaskWidth;
950 character = UnicodeFromUTF8(charBytes);
953 } else {
954 if (IsDBCSLeadByteNoExcept(leadByte)) {
955 bytesInCharacter = 2;
956 character = (leadByte << 8) | cb.UCharAt(position+1);
957 } else {
958 character = leadByte;
961 } else {
962 character = leadByte;
964 if (pWidth) {
965 *pWidth = bytesInCharacter;
967 return character;
970 int SCI_METHOD Document::CodePage() const {
971 return dbcsCodePage;
974 bool SCI_METHOD Document::IsDBCSLeadByte(char ch) const {
975 // Used by lexers so must match IDocument method exactly
976 return IsDBCSLeadByteNoExcept(ch);
979 bool Document::IsDBCSLeadByteNoExcept(char ch) const noexcept {
980 // Used inside core Scintilla
981 // Byte ranges found in Wikipedia articles with relevant search strings in each case
982 const unsigned char uch = ch;
983 switch (dbcsCodePage) {
984 case 932:
985 // Shift_jis
986 return ((uch >= 0x81) && (uch <= 0x9F)) ||
987 ((uch >= 0xE0) && (uch <= 0xFC));
988 // Lead bytes F0 to FC may be a Microsoft addition.
989 case 936:
990 // GBK
991 return (uch >= 0x81) && (uch <= 0xFE);
992 case 949:
993 // Korean Wansung KS C-5601-1987
994 return (uch >= 0x81) && (uch <= 0xFE);
995 case 950:
996 // Big5
997 return (uch >= 0x81) && (uch <= 0xFE);
998 case 1361:
999 // Korean Johab KS C-5601-1992
1000 return
1001 ((uch >= 0x84) && (uch <= 0xD3)) ||
1002 ((uch >= 0xD8) && (uch <= 0xDE)) ||
1003 ((uch >= 0xE0) && (uch <= 0xF9));
1005 return false;
1008 bool Document::IsDBCSLeadByteInvalid(char ch) const noexcept {
1009 const unsigned char lead = ch;
1010 switch (dbcsCodePage) {
1011 case 932:
1012 // Shift_jis
1013 return
1014 (lead == 0x85) ||
1015 (lead == 0x86) ||
1016 (lead == 0xEB) ||
1017 (lead == 0xEC) ||
1018 (lead == 0xEF) ||
1019 (lead == 0xFA) ||
1020 (lead == 0xFB) ||
1021 (lead == 0xFC);
1022 case 936:
1023 // GBK
1024 return (lead == 0x80) || (lead == 0xFF);
1025 case 949:
1026 // Korean Wansung KS C-5601-1987
1027 return (lead == 0x80) || (lead == 0xC9) || (lead >= 0xFE);
1028 case 950:
1029 // Big5
1030 return
1031 ((lead >= 0x80) && (lead <= 0xA0)) ||
1032 (lead == 0xC8) ||
1033 (lead >= 0xFA);
1034 case 1361:
1035 // Korean Johab KS C-5601-1992
1036 return
1037 ((lead >= 0x80) && (lead <= 0x83)) ||
1038 ((lead >= 0xD4) && (lead <= 0xD8)) ||
1039 (lead == 0xDF) ||
1040 (lead >= 0xFA);
1042 return false;
1045 bool Document::IsDBCSTrailByteInvalid(char ch) const noexcept {
1046 const unsigned char trail = ch;
1047 switch (dbcsCodePage) {
1048 case 932:
1049 // Shift_jis
1050 return
1051 (trail <= 0x3F) ||
1052 (trail == 0x7F) ||
1053 (trail >= 0xFD);
1054 case 936:
1055 // GBK
1056 return
1057 (trail <= 0x3F) ||
1058 (trail == 0x7F) ||
1059 (trail == 0xFF);
1060 case 949:
1061 // Korean Wansung KS C-5601-1987
1062 return
1063 (trail <= 0x40) ||
1064 ((trail >= 0x5B) && (trail <= 0x60)) ||
1065 ((trail >= 0x7B) && (trail <= 0x80)) ||
1066 (trail == 0xFF);
1067 case 950:
1068 // Big5
1069 return
1070 (trail <= 0x3F) ||
1071 ((trail >= 0x7F) && (trail <= 0xA0)) ||
1072 (trail == 0xFF);
1073 case 1361:
1074 // Korean Johab KS C-5601-1992
1075 return
1076 (trail <= 0x30) ||
1077 (trail == 0x7F) ||
1078 (trail == 0x80) ||
1079 (trail == 0xFF);
1081 return false;
1084 int Document::DBCSDrawBytes(const char *text, int len) const noexcept {
1085 if (len <= 1) {
1086 return len;
1088 if (IsDBCSLeadByteNoExcept(text[0])) {
1089 return IsDBCSTrailByteInvalid(text[1]) ? 1 : 2;
1090 } else {
1091 return 1;
1095 static constexpr bool IsSpaceOrTab(int ch) noexcept {
1096 return ch == ' ' || ch == '\t';
1099 // Need to break text into segments near lengthSegment but taking into
1100 // account the encoding to not break inside a UTF-8 or DBCS character
1101 // and also trying to avoid breaking inside a pair of combining characters.
1102 // The segment length must always be long enough (more than 4 bytes)
1103 // so that there will be at least one whole character to make a segment.
1104 // For UTF-8, text must consist only of valid whole characters.
1105 // In preference order from best to worst:
1106 // 1) Break after space
1107 // 2) Break before punctuation
1108 // 3) Break after whole character
1110 int Document::SafeSegment(const char *text, int length, int lengthSegment) const noexcept {
1111 if (length <= lengthSegment)
1112 return length;
1113 int lastSpaceBreak = -1;
1114 int lastPunctuationBreak = -1;
1115 int lastEncodingAllowedBreak = 0;
1116 for (int j=0; j < lengthSegment;) {
1117 const unsigned char ch = text[j];
1118 if (j > 0) {
1119 if (IsSpaceOrTab(text[j - 1]) && !IsSpaceOrTab(text[j])) {
1120 lastSpaceBreak = j;
1122 if (ch < 'A') {
1123 lastPunctuationBreak = j;
1126 lastEncodingAllowedBreak = j;
1128 if (dbcsCodePage == SC_CP_UTF8) {
1129 j += UTF8BytesOfLead[ch];
1130 } else if (dbcsCodePage) {
1131 j += IsDBCSLeadByteNoExcept(ch) ? 2 : 1;
1132 } else {
1133 j++;
1136 if (lastSpaceBreak >= 0) {
1137 return lastSpaceBreak;
1138 } else if (lastPunctuationBreak >= 0) {
1139 return lastPunctuationBreak;
1141 return lastEncodingAllowedBreak;
1144 EncodingFamily Document::CodePageFamily() const noexcept {
1145 if (SC_CP_UTF8 == dbcsCodePage)
1146 return efUnicode;
1147 else if (dbcsCodePage)
1148 return efDBCS;
1149 else
1150 return efEightBit;
1153 void Document::ModifiedAt(Sci::Position pos) noexcept {
1154 if (endStyled > pos)
1155 endStyled = pos;
1158 void Document::CheckReadOnly() {
1159 if (cb.IsReadOnly() && enteredReadOnlyCount == 0) {
1160 enteredReadOnlyCount++;
1161 NotifyModifyAttempt();
1162 enteredReadOnlyCount--;
1166 // Document only modified by gateways DeleteChars, InsertString, Undo, Redo, and SetStyleAt.
1167 // SetStyleAt does not change the persistent state of a document
1169 bool Document::DeleteChars(Sci::Position pos, Sci::Position len) {
1170 if (pos < 0)
1171 return false;
1172 if (len <= 0)
1173 return false;
1174 if ((pos + len) > Length())
1175 return false;
1176 CheckReadOnly();
1177 if (enteredModification != 0) {
1178 return false;
1179 } else {
1180 enteredModification++;
1181 if (!cb.IsReadOnly()) {
1182 NotifyModified(
1183 DocModification(
1184 SC_MOD_BEFOREDELETE | SC_PERFORMED_USER,
1185 pos, len,
1186 0, 0));
1187 const Sci::Line prevLinesTotal = LinesTotal();
1188 const bool startSavePoint = cb.IsSavePoint();
1189 bool startSequence = false;
1190 const char *text = cb.DeleteChars(pos, len, startSequence);
1191 if (startSavePoint && cb.IsCollectingUndo())
1192 NotifySavePoint(!startSavePoint);
1193 if ((pos < Length()) || (pos == 0))
1194 ModifiedAt(pos);
1195 else
1196 ModifiedAt(pos-1);
1197 NotifyModified(
1198 DocModification(
1199 SC_MOD_DELETETEXT | SC_PERFORMED_USER | (startSequence?SC_STARTACTION:0),
1200 pos, len,
1201 LinesTotal() - prevLinesTotal, text));
1203 enteredModification--;
1205 return !cb.IsReadOnly();
1209 * Insert a string with a length.
1211 Sci::Position Document::InsertString(Sci::Position position, const char *s, Sci::Position insertLength) {
1212 if (insertLength <= 0) {
1213 return 0;
1215 CheckReadOnly(); // Application may change read only state here
1216 if (cb.IsReadOnly()) {
1217 return 0;
1219 if (enteredModification != 0) {
1220 return 0;
1222 enteredModification++;
1223 insertionSet = false;
1224 insertion.clear();
1225 NotifyModified(
1226 DocModification(
1227 SC_MOD_INSERTCHECK,
1228 position, insertLength,
1229 0, s));
1230 if (insertionSet) {
1231 s = insertion.c_str();
1232 insertLength = insertion.length();
1234 NotifyModified(
1235 DocModification(
1236 SC_MOD_BEFOREINSERT | SC_PERFORMED_USER,
1237 position, insertLength,
1238 0, s));
1239 const Sci::Line prevLinesTotal = LinesTotal();
1240 const bool startSavePoint = cb.IsSavePoint();
1241 bool startSequence = false;
1242 const char *text = cb.InsertString(position, s, insertLength, startSequence);
1243 if (startSavePoint && cb.IsCollectingUndo())
1244 NotifySavePoint(!startSavePoint);
1245 ModifiedAt(position);
1246 NotifyModified(
1247 DocModification(
1248 SC_MOD_INSERTTEXT | SC_PERFORMED_USER | (startSequence?SC_STARTACTION:0),
1249 position, insertLength,
1250 LinesTotal() - prevLinesTotal, text));
1251 if (insertionSet) { // Free memory as could be large
1252 std::string().swap(insertion);
1254 enteredModification--;
1255 return insertLength;
1258 void Document::ChangeInsertion(const char *s, Sci::Position length) {
1259 insertionSet = true;
1260 insertion.assign(s, length);
1263 int SCI_METHOD Document::AddData(const char *data, Sci_Position length) {
1264 try {
1265 const Sci::Position position = Length();
1266 InsertString(position, data, length);
1267 } catch (std::bad_alloc &) {
1268 return SC_STATUS_BADALLOC;
1269 } catch (...) {
1270 return SC_STATUS_FAILURE;
1272 return 0;
1275 void * SCI_METHOD Document::ConvertToDocument() {
1276 return this;
1279 Sci::Position Document::Undo() {
1280 Sci::Position newPos = -1;
1281 CheckReadOnly();
1282 if ((enteredModification == 0) && (cb.IsCollectingUndo())) {
1283 enteredModification++;
1284 if (!cb.IsReadOnly()) {
1285 const bool startSavePoint = cb.IsSavePoint();
1286 bool multiLine = false;
1287 const int steps = cb.StartUndo();
1288 //Platform::DebugPrintf("Steps=%d\n", steps);
1289 Sci::Position coalescedRemovePos = -1;
1290 Sci::Position coalescedRemoveLen = 0;
1291 Sci::Position prevRemoveActionPos = -1;
1292 Sci::Position prevRemoveActionLen = 0;
1293 for (int step = 0; step < steps; step++) {
1294 const Sci::Line prevLinesTotal = LinesTotal();
1295 const Action &action = cb.GetUndoStep();
1296 if (action.at == removeAction) {
1297 NotifyModified(DocModification(
1298 SC_MOD_BEFOREINSERT | SC_PERFORMED_UNDO, action));
1299 } else if (action.at == containerAction) {
1300 DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_UNDO);
1301 dm.token = action.position;
1302 NotifyModified(dm);
1303 if (!action.mayCoalesce) {
1304 coalescedRemovePos = -1;
1305 coalescedRemoveLen = 0;
1306 prevRemoveActionPos = -1;
1307 prevRemoveActionLen = 0;
1309 } else {
1310 NotifyModified(DocModification(
1311 SC_MOD_BEFOREDELETE | SC_PERFORMED_UNDO, action));
1313 cb.PerformUndoStep();
1314 if (action.at != containerAction) {
1315 ModifiedAt(action.position);
1316 newPos = action.position;
1319 int modFlags = SC_PERFORMED_UNDO;
1320 // With undo, an insertion action becomes a deletion notification
1321 if (action.at == removeAction) {
1322 newPos += action.lenData;
1323 modFlags |= SC_MOD_INSERTTEXT;
1324 if ((coalescedRemoveLen > 0) &&
1325 (action.position == prevRemoveActionPos || action.position == (prevRemoveActionPos + prevRemoveActionLen))) {
1326 coalescedRemoveLen += action.lenData;
1327 newPos = coalescedRemovePos + coalescedRemoveLen;
1328 } else {
1329 coalescedRemovePos = action.position;
1330 coalescedRemoveLen = action.lenData;
1332 prevRemoveActionPos = action.position;
1333 prevRemoveActionLen = action.lenData;
1334 } else if (action.at == insertAction) {
1335 modFlags |= SC_MOD_DELETETEXT;
1336 coalescedRemovePos = -1;
1337 coalescedRemoveLen = 0;
1338 prevRemoveActionPos = -1;
1339 prevRemoveActionLen = 0;
1341 if (steps > 1)
1342 modFlags |= SC_MULTISTEPUNDOREDO;
1343 const Sci::Line linesAdded = LinesTotal() - prevLinesTotal;
1344 if (linesAdded != 0)
1345 multiLine = true;
1346 if (step == steps - 1) {
1347 modFlags |= SC_LASTSTEPINUNDOREDO;
1348 if (multiLine)
1349 modFlags |= SC_MULTILINEUNDOREDO;
1351 NotifyModified(DocModification(modFlags, action.position, action.lenData,
1352 linesAdded, action.data.get()));
1355 const bool endSavePoint = cb.IsSavePoint();
1356 if (startSavePoint != endSavePoint)
1357 NotifySavePoint(endSavePoint);
1359 enteredModification--;
1361 return newPos;
1364 Sci::Position Document::Redo() {
1365 Sci::Position newPos = -1;
1366 CheckReadOnly();
1367 if ((enteredModification == 0) && (cb.IsCollectingUndo())) {
1368 enteredModification++;
1369 if (!cb.IsReadOnly()) {
1370 const bool startSavePoint = cb.IsSavePoint();
1371 bool multiLine = false;
1372 const int steps = cb.StartRedo();
1373 for (int step = 0; step < steps; step++) {
1374 const Sci::Line prevLinesTotal = LinesTotal();
1375 const Action &action = cb.GetRedoStep();
1376 if (action.at == insertAction) {
1377 NotifyModified(DocModification(
1378 SC_MOD_BEFOREINSERT | SC_PERFORMED_REDO, action));
1379 } else if (action.at == containerAction) {
1380 DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_REDO);
1381 dm.token = action.position;
1382 NotifyModified(dm);
1383 } else {
1384 NotifyModified(DocModification(
1385 SC_MOD_BEFOREDELETE | SC_PERFORMED_REDO, action));
1387 cb.PerformRedoStep();
1388 if (action.at != containerAction) {
1389 ModifiedAt(action.position);
1390 newPos = action.position;
1393 int modFlags = SC_PERFORMED_REDO;
1394 if (action.at == insertAction) {
1395 newPos += action.lenData;
1396 modFlags |= SC_MOD_INSERTTEXT;
1397 } else if (action.at == removeAction) {
1398 modFlags |= SC_MOD_DELETETEXT;
1400 if (steps > 1)
1401 modFlags |= SC_MULTISTEPUNDOREDO;
1402 const Sci::Line linesAdded = LinesTotal() - prevLinesTotal;
1403 if (linesAdded != 0)
1404 multiLine = true;
1405 if (step == steps - 1) {
1406 modFlags |= SC_LASTSTEPINUNDOREDO;
1407 if (multiLine)
1408 modFlags |= SC_MULTILINEUNDOREDO;
1410 NotifyModified(
1411 DocModification(modFlags, action.position, action.lenData,
1412 linesAdded, action.data.get()));
1415 const bool endSavePoint = cb.IsSavePoint();
1416 if (startSavePoint != endSavePoint)
1417 NotifySavePoint(endSavePoint);
1419 enteredModification--;
1421 return newPos;
1424 void Document::DelChar(Sci::Position pos) {
1425 DeleteChars(pos, LenChar(pos));
1428 void Document::DelCharBack(Sci::Position pos) {
1429 if (pos <= 0) {
1430 return;
1431 } else if (IsCrLf(pos - 2)) {
1432 DeleteChars(pos - 2, 2);
1433 } else if (dbcsCodePage) {
1434 const Sci::Position startChar = NextPosition(pos, -1);
1435 DeleteChars(startChar, pos - startChar);
1436 } else {
1437 DeleteChars(pos - 1, 1);
1441 static constexpr Sci::Position NextTab(Sci::Position pos, Sci::Position tabSize) noexcept {
1442 return ((pos / tabSize) + 1) * tabSize;
1445 static std::string CreateIndentation(Sci::Position indent, int tabSize, bool insertSpaces) {
1446 std::string indentation;
1447 if (!insertSpaces) {
1448 while (indent >= tabSize) {
1449 indentation += '\t';
1450 indent -= tabSize;
1453 while (indent > 0) {
1454 indentation += ' ';
1455 indent--;
1457 return indentation;
1460 int SCI_METHOD Document::GetLineIndentation(Sci_Position line) {
1461 int indent = 0;
1462 if ((line >= 0) && (line < LinesTotal())) {
1463 const Sci::Position lineStart = LineStart(line);
1464 const Sci::Position length = Length();
1465 for (Sci::Position i = lineStart; i < length; i++) {
1466 const char ch = cb.CharAt(i);
1467 if (ch == ' ')
1468 indent++;
1469 else if (ch == '\t')
1470 indent = static_cast<int>(NextTab(indent, tabInChars));
1471 else
1472 return indent;
1475 return indent;
1478 Sci::Position Document::SetLineIndentation(Sci::Line line, Sci::Position indent) {
1479 const int indentOfLine = GetLineIndentation(line);
1480 if (indent < 0)
1481 indent = 0;
1482 if (indent != indentOfLine) {
1483 std::string linebuf = CreateIndentation(indent, tabInChars, !useTabs);
1484 const Sci::Position thisLineStart = LineStart(line);
1485 const Sci::Position indentPos = GetLineIndentPosition(line);
1486 UndoGroup ug(this);
1487 DeleteChars(thisLineStart, indentPos - thisLineStart);
1488 return thisLineStart + InsertString(thisLineStart, linebuf.c_str(),
1489 linebuf.length());
1490 } else {
1491 return GetLineIndentPosition(line);
1495 Sci::Position Document::GetLineIndentPosition(Sci::Line line) const {
1496 if (line < 0)
1497 return 0;
1498 Sci::Position pos = LineStart(line);
1499 const Sci::Position length = Length();
1500 while ((pos < length) && IsSpaceOrTab(cb.CharAt(pos))) {
1501 pos++;
1503 return pos;
1506 Sci::Position Document::GetColumn(Sci::Position pos) {
1507 Sci::Position column = 0;
1508 const Sci::Line line = SciLineFromPosition(pos);
1509 if ((line >= 0) && (line < LinesTotal())) {
1510 for (Sci::Position i = LineStart(line); i < pos;) {
1511 const char ch = cb.CharAt(i);
1512 if (ch == '\t') {
1513 column = NextTab(column, tabInChars);
1514 i++;
1515 } else if (ch == '\r') {
1516 return column;
1517 } else if (ch == '\n') {
1518 return column;
1519 } else if (i >= Length()) {
1520 return column;
1521 } else {
1522 column++;
1523 i = NextPosition(i, 1);
1527 return column;
1530 Sci::Position Document::CountCharacters(Sci::Position startPos, Sci::Position endPos) const {
1531 startPos = MovePositionOutsideChar(startPos, 1, false);
1532 endPos = MovePositionOutsideChar(endPos, -1, false);
1533 Sci::Position count = 0;
1534 Sci::Position i = startPos;
1535 while (i < endPos) {
1536 count++;
1537 i = NextPosition(i, 1);
1539 return count;
1542 Sci::Position Document::CountUTF16(Sci::Position startPos, Sci::Position endPos) const {
1543 startPos = MovePositionOutsideChar(startPos, 1, false);
1544 endPos = MovePositionOutsideChar(endPos, -1, false);
1545 Sci::Position count = 0;
1546 Sci::Position i = startPos;
1547 while (i < endPos) {
1548 count++;
1549 const Sci::Position next = NextPosition(i, 1);
1550 if ((next - i) > 3)
1551 count++;
1552 i = next;
1554 return count;
1557 Sci::Position Document::FindColumn(Sci::Line line, Sci::Position column) {
1558 Sci::Position position = LineStart(line);
1559 if ((line >= 0) && (line < LinesTotal())) {
1560 Sci::Position columnCurrent = 0;
1561 while ((columnCurrent < column) && (position < Length())) {
1562 const char ch = cb.CharAt(position);
1563 if (ch == '\t') {
1564 columnCurrent = NextTab(columnCurrent, tabInChars);
1565 if (columnCurrent > column)
1566 return position;
1567 position++;
1568 } else if (ch == '\r') {
1569 return position;
1570 } else if (ch == '\n') {
1571 return position;
1572 } else {
1573 columnCurrent++;
1574 position = NextPosition(position, 1);
1578 return position;
1581 void Document::Indent(bool forwards, Sci::Line lineBottom, Sci::Line lineTop) {
1582 // Dedent - suck white space off the front of the line to dedent by equivalent of a tab
1583 for (Sci::Line line = lineBottom; line >= lineTop; line--) {
1584 const Sci::Position indentOfLine = GetLineIndentation(line);
1585 if (forwards) {
1586 if (LineStart(line) < LineEnd(line)) {
1587 SetLineIndentation(line, indentOfLine + IndentSize());
1589 } else {
1590 SetLineIndentation(line, indentOfLine - IndentSize());
1595 // Convert line endings for a piece of text to a particular mode.
1596 // Stop at len or when a NUL is found.
1597 std::string Document::TransformLineEnds(const char *s, size_t len, int eolModeWanted) {
1598 std::string dest;
1599 for (size_t i = 0; (i < len) && (s[i]); i++) {
1600 if (s[i] == '\n' || s[i] == '\r') {
1601 if (eolModeWanted == SC_EOL_CR) {
1602 dest.push_back('\r');
1603 } else if (eolModeWanted == SC_EOL_LF) {
1604 dest.push_back('\n');
1605 } else { // eolModeWanted == SC_EOL_CRLF
1606 dest.push_back('\r');
1607 dest.push_back('\n');
1609 if ((s[i] == '\r') && (i+1 < len) && (s[i+1] == '\n')) {
1610 i++;
1612 } else {
1613 dest.push_back(s[i]);
1616 return dest;
1619 void Document::ConvertLineEnds(int eolModeSet) {
1620 UndoGroup ug(this);
1622 for (Sci::Position pos = 0; pos < Length(); pos++) {
1623 if (cb.CharAt(pos) == '\r') {
1624 if (cb.CharAt(pos + 1) == '\n') {
1625 // CRLF
1626 if (eolModeSet == SC_EOL_CR) {
1627 DeleteChars(pos + 1, 1); // Delete the LF
1628 } else if (eolModeSet == SC_EOL_LF) {
1629 DeleteChars(pos, 1); // Delete the CR
1630 } else {
1631 pos++;
1633 } else {
1634 // CR
1635 if (eolModeSet == SC_EOL_CRLF) {
1636 pos += InsertString(pos + 1, "\n", 1); // Insert LF
1637 } else if (eolModeSet == SC_EOL_LF) {
1638 pos += InsertString(pos, "\n", 1); // Insert LF
1639 DeleteChars(pos, 1); // Delete CR
1640 pos--;
1643 } else if (cb.CharAt(pos) == '\n') {
1644 // LF
1645 if (eolModeSet == SC_EOL_CRLF) {
1646 pos += InsertString(pos, "\r", 1); // Insert CR
1647 } else if (eolModeSet == SC_EOL_CR) {
1648 pos += InsertString(pos, "\r", 1); // Insert CR
1649 DeleteChars(pos, 1); // Delete LF
1650 pos--;
1657 int Document::Options() const {
1658 return (IsLarge() ? SC_DOCUMENTOPTION_TEXT_LARGE : 0) |
1659 (cb.HasStyles() ? 0 : SC_DOCUMENTOPTION_STYLES_NONE);
1662 bool Document::IsWhiteLine(Sci::Line line) const {
1663 Sci::Position currentChar = LineStart(line);
1664 const Sci::Position endLine = LineEnd(line);
1665 while (currentChar < endLine) {
1666 if (!IsSpaceOrTab(cb.CharAt(currentChar))) {
1667 return false;
1669 ++currentChar;
1671 return true;
1674 Sci::Position Document::ParaUp(Sci::Position pos) const {
1675 Sci::Line line = SciLineFromPosition(pos);
1676 line--;
1677 while (line >= 0 && IsWhiteLine(line)) { // skip empty lines
1678 line--;
1680 while (line >= 0 && !IsWhiteLine(line)) { // skip non-empty lines
1681 line--;
1683 line++;
1684 return LineStart(line);
1687 Sci::Position Document::ParaDown(Sci::Position pos) const {
1688 Sci::Line line = SciLineFromPosition(pos);
1689 while (line < LinesTotal() && !IsWhiteLine(line)) { // skip non-empty lines
1690 line++;
1692 while (line < LinesTotal() && IsWhiteLine(line)) { // skip empty lines
1693 line++;
1695 if (line < LinesTotal())
1696 return LineStart(line);
1697 else // end of a document
1698 return LineEnd(line-1);
1701 bool Document::IsASCIIWordByte(unsigned char ch) const {
1702 if (IsASCII(ch)) {
1703 return charClass.GetClass(ch) == CharClassify::ccWord;
1704 } else {
1705 return false;
1709 CharClassify::cc Document::WordCharacterClass(unsigned int ch) const {
1710 if (dbcsCodePage && (!UTF8IsAscii(ch))) {
1711 if (SC_CP_UTF8 == dbcsCodePage) {
1712 // Use hard coded Unicode class
1713 const CharacterCategory cc = charMap.CategoryFor(ch);
1714 switch (cc) {
1716 // Separator, Line/Paragraph
1717 case ccZl:
1718 case ccZp:
1719 return CharClassify::ccNewLine;
1721 // Separator, Space
1722 case ccZs:
1723 // Other
1724 case ccCc:
1725 case ccCf:
1726 case ccCs:
1727 case ccCo:
1728 case ccCn:
1729 return CharClassify::ccSpace;
1731 // Letter
1732 case ccLu:
1733 case ccLl:
1734 case ccLt:
1735 case ccLm:
1736 case ccLo:
1737 // Number
1738 case ccNd:
1739 case ccNl:
1740 case ccNo:
1741 // Mark - includes combining diacritics
1742 case ccMn:
1743 case ccMc:
1744 case ccMe:
1745 return CharClassify::ccWord;
1747 // Punctuation
1748 case ccPc:
1749 case ccPd:
1750 case ccPs:
1751 case ccPe:
1752 case ccPi:
1753 case ccPf:
1754 case ccPo:
1755 // Symbol
1756 case ccSm:
1757 case ccSc:
1758 case ccSk:
1759 case ccSo:
1760 return CharClassify::ccPunctuation;
1763 } else {
1764 // Asian DBCS
1765 return CharClassify::ccWord;
1768 return charClass.GetClass(static_cast<unsigned char>(ch));
1772 * Used by commmands that want to select whole words.
1773 * Finds the start of word at pos when delta < 0 or the end of the word when delta >= 0.
1775 Sci::Position Document::ExtendWordSelect(Sci::Position pos, int delta, bool onlyWordCharacters) const {
1776 CharClassify::cc ccStart = CharClassify::ccWord;
1777 if (delta < 0) {
1778 if (!onlyWordCharacters) {
1779 const CharacterExtracted ce = CharacterBefore(pos);
1780 ccStart = WordCharacterClass(ce.character);
1782 while (pos > 0) {
1783 const CharacterExtracted ce = CharacterBefore(pos);
1784 if (WordCharacterClass(ce.character) != ccStart)
1785 break;
1786 pos -= ce.widthBytes;
1788 } else {
1789 if (!onlyWordCharacters && pos < Length()) {
1790 const CharacterExtracted ce = CharacterAfter(pos);
1791 ccStart = WordCharacterClass(ce.character);
1793 while (pos < Length()) {
1794 const CharacterExtracted ce = CharacterAfter(pos);
1795 if (WordCharacterClass(ce.character) != ccStart)
1796 break;
1797 pos += ce.widthBytes;
1800 return MovePositionOutsideChar(pos, delta, true);
1804 * Find the start of the next word in either a forward (delta >= 0) or backwards direction
1805 * (delta < 0).
1806 * This is looking for a transition between character classes although there is also some
1807 * additional movement to transit white space.
1808 * Used by cursor movement by word commands.
1810 Sci::Position Document::NextWordStart(Sci::Position pos, int delta) const {
1811 if (delta < 0) {
1812 while (pos > 0) {
1813 const CharacterExtracted ce = CharacterBefore(pos);
1814 if (WordCharacterClass(ce.character) != CharClassify::ccSpace)
1815 break;
1816 pos -= ce.widthBytes;
1818 if (pos > 0) {
1819 CharacterExtracted ce = CharacterBefore(pos);
1820 const CharClassify::cc ccStart = WordCharacterClass(ce.character);
1821 while (pos > 0) {
1822 ce = CharacterBefore(pos);
1823 if (WordCharacterClass(ce.character) != ccStart)
1824 break;
1825 pos -= ce.widthBytes;
1828 } else {
1829 CharacterExtracted ce = CharacterAfter(pos);
1830 const CharClassify::cc ccStart = WordCharacterClass(ce.character);
1831 while (pos < Length()) {
1832 ce = CharacterAfter(pos);
1833 if (WordCharacterClass(ce.character) != ccStart)
1834 break;
1835 pos += ce.widthBytes;
1837 while (pos < Length()) {
1838 ce = CharacterAfter(pos);
1839 if (WordCharacterClass(ce.character) != CharClassify::ccSpace)
1840 break;
1841 pos += ce.widthBytes;
1844 return pos;
1848 * Find the end of the next word in either a forward (delta >= 0) or backwards direction
1849 * (delta < 0).
1850 * This is looking for a transition between character classes although there is also some
1851 * additional movement to transit white space.
1852 * Used by cursor movement by word commands.
1854 Sci::Position Document::NextWordEnd(Sci::Position pos, int delta) const {
1855 if (delta < 0) {
1856 if (pos > 0) {
1857 CharacterExtracted ce = CharacterBefore(pos);
1858 const CharClassify::cc ccStart = WordCharacterClass(ce.character);
1859 if (ccStart != CharClassify::ccSpace) {
1860 while (pos > 0) {
1861 ce = CharacterBefore(pos);
1862 if (WordCharacterClass(ce.character) != ccStart)
1863 break;
1864 pos -= ce.widthBytes;
1867 while (pos > 0) {
1868 ce = CharacterBefore(pos);
1869 if (WordCharacterClass(ce.character) != CharClassify::ccSpace)
1870 break;
1871 pos -= ce.widthBytes;
1874 } else {
1875 while (pos < Length()) {
1876 const CharacterExtracted ce = CharacterAfter(pos);
1877 if (WordCharacterClass(ce.character) != CharClassify::ccSpace)
1878 break;
1879 pos += ce.widthBytes;
1881 if (pos < Length()) {
1882 CharacterExtracted ce = CharacterAfter(pos);
1883 const CharClassify::cc ccStart = WordCharacterClass(ce.character);
1884 while (pos < Length()) {
1885 ce = CharacterAfter(pos);
1886 if (WordCharacterClass(ce.character) != ccStart)
1887 break;
1888 pos += ce.widthBytes;
1892 return pos;
1896 * Check that the character at the given position is a word or punctuation character and that
1897 * the previous character is of a different character class.
1899 bool Document::IsWordStartAt(Sci::Position pos) const {
1900 if (pos >= Length())
1901 return false;
1902 if (pos > 0) {
1903 const CharacterExtracted cePos = CharacterAfter(pos);
1904 const CharClassify::cc ccPos = WordCharacterClass(cePos.character);
1905 const CharacterExtracted cePrev = CharacterBefore(pos);
1906 const CharClassify::cc ccPrev = WordCharacterClass(cePrev.character);
1907 return (ccPos == CharClassify::ccWord || ccPos == CharClassify::ccPunctuation) &&
1908 (ccPos != ccPrev);
1910 return true;
1914 * Check that the character at the given position is a word or punctuation character and that
1915 * the next character is of a different character class.
1917 bool Document::IsWordEndAt(Sci::Position pos) const {
1918 if (pos <= 0)
1919 return false;
1920 if (pos < Length()) {
1921 const CharacterExtracted cePos = CharacterAfter(pos);
1922 const CharClassify::cc ccPos = WordCharacterClass(cePos.character);
1923 const CharacterExtracted cePrev = CharacterBefore(pos);
1924 const CharClassify::cc ccPrev = WordCharacterClass(cePrev.character);
1925 return (ccPrev == CharClassify::ccWord || ccPrev == CharClassify::ccPunctuation) &&
1926 (ccPrev != ccPos);
1928 return true;
1932 * Check that the given range is has transitions between character classes at both
1933 * ends and where the characters on the inside are word or punctuation characters.
1935 bool Document::IsWordAt(Sci::Position start, Sci::Position end) const {
1936 return (start < end) && IsWordStartAt(start) && IsWordEndAt(end);
1939 bool Document::MatchesWordOptions(bool word, bool wordStart, Sci::Position pos, Sci::Position length) const {
1940 return (!word && !wordStart) ||
1941 (word && IsWordAt(pos, pos + length)) ||
1942 (wordStart && IsWordStartAt(pos));
1945 bool Document::HasCaseFolder() const noexcept {
1946 return pcf != nullptr;
1949 void Document::SetCaseFolder(CaseFolder *pcf_) {
1950 pcf.reset(pcf_);
1953 Document::CharacterExtracted Document::ExtractCharacter(Sci::Position position) const noexcept {
1954 const unsigned char leadByte = cb.UCharAt(position);
1955 if (UTF8IsAscii(leadByte)) {
1956 // Common case: ASCII character
1957 return CharacterExtracted(leadByte, 1);
1959 const int widthCharBytes = UTF8BytesOfLead[leadByte];
1960 unsigned char charBytes[UTF8MaxBytes] = { leadByte, 0, 0, 0 };
1961 for (int b=1; b<widthCharBytes; b++)
1962 charBytes[b] = cb.UCharAt(position + b);
1963 const int utf8status = UTF8Classify(charBytes, widthCharBytes);
1964 if (utf8status & UTF8MaskInvalid) {
1965 // Treat as invalid and use up just one byte
1966 return CharacterExtracted(unicodeReplacementChar, 1);
1967 } else {
1968 return CharacterExtracted(UnicodeFromUTF8(charBytes), utf8status & UTF8MaskWidth);
1973 * Find text in document, supporting both forward and backward
1974 * searches (just pass minPos > maxPos to do a backward search)
1975 * Has not been tested with backwards DBCS searches yet.
1977 Sci::Position Document::FindText(Sci::Position minPos, Sci::Position maxPos, const char *search,
1978 int flags, Sci::Position *length) {
1979 if (*length <= 0)
1980 return minPos;
1981 const bool caseSensitive = (flags & SCFIND_MATCHCASE) != 0;
1982 const bool word = (flags & SCFIND_WHOLEWORD) != 0;
1983 const bool wordStart = (flags & SCFIND_WORDSTART) != 0;
1984 const bool regExp = (flags & SCFIND_REGEXP) != 0;
1985 if (regExp) {
1986 if (!regex)
1987 regex = std::unique_ptr<RegexSearchBase>(CreateRegexSearch(&charClass));
1988 return regex->FindText(this, minPos, maxPos, search, caseSensitive, word, wordStart, flags, length);
1989 } else {
1991 const bool forward = minPos <= maxPos;
1992 const int increment = forward ? 1 : -1;
1994 // Range endpoints should not be inside DBCS characters, but just in case, move them.
1995 const Sci::Position startPos = MovePositionOutsideChar(minPos, increment, false);
1996 const Sci::Position endPos = MovePositionOutsideChar(maxPos, increment, false);
1998 // Compute actual search ranges needed
1999 const Sci::Position lengthFind = *length;
2001 //Platform::DebugPrintf("Find %d %d %s %d\n", startPos, endPos, ft->lpstrText, lengthFind);
2002 const Sci::Position limitPos = std::max(startPos, endPos);
2003 Sci::Position pos = startPos;
2004 if (!forward) {
2005 // Back all of a character
2006 pos = NextPosition(pos, increment);
2008 if (caseSensitive) {
2009 const Sci::Position endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos;
2010 const char charStartSearch = search[0];
2011 while (forward ? (pos < endSearch) : (pos >= endSearch)) {
2012 if (CharAt(pos) == charStartSearch) {
2013 bool found = (pos + lengthFind) <= limitPos;
2014 for (int indexSearch = 1; (indexSearch < lengthFind) && found; indexSearch++) {
2015 found = CharAt(pos + indexSearch) == search[indexSearch];
2017 if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
2018 return pos;
2021 if (!NextCharacter(pos, increment))
2022 break;
2024 } else if (SC_CP_UTF8 == dbcsCodePage) {
2025 const size_t maxFoldingExpansion = 4;
2026 std::vector<char> searchThing((lengthFind+1) * UTF8MaxBytes * maxFoldingExpansion + 1);
2027 const size_t lenSearch =
2028 pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind);
2029 char bytes[UTF8MaxBytes + 1] = "";
2030 char folded[UTF8MaxBytes * maxFoldingExpansion + 1] = "";
2031 while (forward ? (pos < endPos) : (pos >= endPos)) {
2032 int widthFirstCharacter = 0;
2033 Sci::Position posIndexDocument = pos;
2034 size_t indexSearch = 0;
2035 bool characterMatches = true;
2036 for (;;) {
2037 const unsigned char leadByte = cb.UCharAt(posIndexDocument);
2038 bytes[0] = leadByte;
2039 int widthChar = 1;
2040 if (!UTF8IsAscii(leadByte)) {
2041 const int widthCharBytes = UTF8BytesOfLead[leadByte];
2042 for (int b=1; b<widthCharBytes; b++) {
2043 bytes[b] = cb.CharAt(posIndexDocument+b);
2045 widthChar = UTF8Classify(reinterpret_cast<const unsigned char *>(bytes), widthCharBytes) & UTF8MaskWidth;
2047 if (!widthFirstCharacter)
2048 widthFirstCharacter = widthChar;
2049 if ((posIndexDocument + widthChar) > limitPos)
2050 break;
2051 const size_t lenFlat = pcf->Fold(folded, sizeof(folded), bytes, widthChar);
2052 // memcmp may examine lenFlat bytes in both arguments so assert it doesn't read past end of searchThing
2053 assert((indexSearch + lenFlat) <= searchThing.size());
2054 // Does folded match the buffer
2055 characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat);
2056 if (!characterMatches)
2057 break;
2058 posIndexDocument += widthChar;
2059 indexSearch += lenFlat;
2060 if (indexSearch >= lenSearch)
2061 break;
2063 if (characterMatches && (indexSearch == lenSearch)) {
2064 if (MatchesWordOptions(word, wordStart, pos, posIndexDocument - pos)) {
2065 *length = posIndexDocument - pos;
2066 return pos;
2069 if (forward) {
2070 pos += widthFirstCharacter;
2071 } else {
2072 if (!NextCharacter(pos, increment))
2073 break;
2076 } else if (dbcsCodePage) {
2077 const size_t maxBytesCharacter = 2;
2078 const size_t maxFoldingExpansion = 4;
2079 std::vector<char> searchThing((lengthFind+1) * maxBytesCharacter * maxFoldingExpansion + 1);
2080 const size_t lenSearch = pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind);
2081 while (forward ? (pos < endPos) : (pos >= endPos)) {
2082 Sci::Position indexDocument = 0;
2083 size_t indexSearch = 0;
2084 bool characterMatches = true;
2085 while (characterMatches &&
2086 ((pos + indexDocument) < limitPos) &&
2087 (indexSearch < lenSearch)) {
2088 char bytes[maxBytesCharacter + 1];
2089 bytes[0] = cb.CharAt(pos + indexDocument);
2090 const Sci::Position widthChar = IsDBCSLeadByteNoExcept(bytes[0]) ? 2 : 1;
2091 if (widthChar == 2)
2092 bytes[1] = cb.CharAt(pos + indexDocument + 1);
2093 if ((pos + indexDocument + widthChar) > limitPos)
2094 break;
2095 char folded[maxBytesCharacter * maxFoldingExpansion + 1];
2096 const size_t lenFlat = pcf->Fold(folded, sizeof(folded), bytes, widthChar);
2097 // memcmp may examine lenFlat bytes in both arguments so assert it doesn't read past end of searchThing
2098 assert((indexSearch + lenFlat) <= searchThing.size());
2099 // Does folded match the buffer
2100 characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat);
2101 indexDocument += widthChar;
2102 indexSearch += lenFlat;
2104 if (characterMatches && (indexSearch == lenSearch)) {
2105 if (MatchesWordOptions(word, wordStart, pos, indexDocument)) {
2106 *length = indexDocument;
2107 return pos;
2110 if (!NextCharacter(pos, increment))
2111 break;
2113 } else {
2114 const Sci::Position endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos;
2115 std::vector<char> searchThing(lengthFind + 1);
2116 pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind);
2117 while (forward ? (pos < endSearch) : (pos >= endSearch)) {
2118 bool found = (pos + lengthFind) <= limitPos;
2119 for (int indexSearch = 0; (indexSearch < lengthFind) && found; indexSearch++) {
2120 const char ch = CharAt(pos + indexSearch);
2121 char folded[2];
2122 pcf->Fold(folded, sizeof(folded), &ch, 1);
2123 found = folded[0] == searchThing[indexSearch];
2125 if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
2126 return pos;
2128 if (!NextCharacter(pos, increment))
2129 break;
2133 //Platform::DebugPrintf("Not found\n");
2134 return -1;
2137 const char *Document::SubstituteByPosition(const char *text, Sci::Position *length) {
2138 if (regex)
2139 return regex->SubstituteByPosition(this, text, length);
2140 else
2141 return nullptr;
2144 int Document::LineCharacterIndex() const {
2145 return cb.LineCharacterIndex();
2148 void Document::AllocateLineCharacterIndex(int lineCharacterIndex) {
2149 return cb.AllocateLineCharacterIndex(lineCharacterIndex);
2152 void Document::ReleaseLineCharacterIndex(int lineCharacterIndex) {
2153 return cb.ReleaseLineCharacterIndex(lineCharacterIndex);
2156 Sci::Line Document::LinesTotal() const noexcept {
2157 return cb.Lines();
2160 void Document::SetDefaultCharClasses(bool includeWordClass) {
2161 charClass.SetDefaultCharClasses(includeWordClass);
2164 void Document::SetCharClasses(const unsigned char *chars, CharClassify::cc newCharClass) {
2165 charClass.SetCharClasses(chars, newCharClass);
2168 int Document::GetCharsOfClass(CharClassify::cc characterClass, unsigned char *buffer) const {
2169 return charClass.GetCharsOfClass(characterClass, buffer);
2172 void Document::SetCharacterCategoryOptimization(int countCharacters) {
2173 charMap.Optimize(countCharacters);
2176 int Document::CharacterCategoryOptimization() const noexcept {
2177 return charMap.Size();
2180 void SCI_METHOD Document::StartStyling(Sci_Position position, char) {
2181 endStyled = position;
2184 bool SCI_METHOD Document::SetStyleFor(Sci_Position length, char style) {
2185 if (enteredStyling != 0) {
2186 return false;
2187 } else {
2188 enteredStyling++;
2189 const Sci::Position prevEndStyled = endStyled;
2190 if (cb.SetStyleFor(endStyled, length, style)) {
2191 const DocModification mh(SC_MOD_CHANGESTYLE | SC_PERFORMED_USER,
2192 prevEndStyled, length);
2193 NotifyModified(mh);
2195 endStyled += length;
2196 enteredStyling--;
2197 return true;
2201 bool SCI_METHOD Document::SetStyles(Sci_Position length, const char *styles) {
2202 if (enteredStyling != 0) {
2203 return false;
2204 } else {
2205 enteredStyling++;
2206 bool didChange = false;
2207 Sci::Position startMod = 0;
2208 Sci::Position endMod = 0;
2209 for (int iPos = 0; iPos < length; iPos++, endStyled++) {
2210 PLATFORM_ASSERT(endStyled < Length());
2211 if (cb.SetStyleAt(endStyled, styles[iPos])) {
2212 if (!didChange) {
2213 startMod = endStyled;
2215 didChange = true;
2216 endMod = endStyled;
2219 if (didChange) {
2220 const DocModification mh(SC_MOD_CHANGESTYLE | SC_PERFORMED_USER,
2221 startMod, endMod - startMod + 1);
2222 NotifyModified(mh);
2224 enteredStyling--;
2225 return true;
2229 void Document::EnsureStyledTo(Sci::Position pos) {
2230 if ((enteredStyling == 0) && (pos > GetEndStyled())) {
2231 IncrementStyleClock();
2232 if (pli && !pli->UseContainerLexing()) {
2233 const Sci::Line lineEndStyled = SciLineFromPosition(GetEndStyled());
2234 const Sci::Position endStyledTo = LineStart(lineEndStyled);
2235 pli->Colourise(endStyledTo, pos);
2236 } else {
2237 // Ask the watchers to style, and stop as soon as one responds.
2238 for (std::vector<WatcherWithUserData>::iterator it = watchers.begin();
2239 (pos > GetEndStyled()) && (it != watchers.end()); ++it) {
2240 it->watcher->NotifyStyleNeeded(this, it->userData, pos);
2246 void Document::StyleToAdjustingLineDuration(Sci::Position pos) {
2247 const Sci::Line lineFirst = SciLineFromPosition(GetEndStyled());
2248 ElapsedPeriod epStyling;
2249 EnsureStyledTo(pos);
2250 const Sci::Line lineLast = SciLineFromPosition(GetEndStyled());
2251 durationStyleOneLine.AddSample(lineLast - lineFirst, epStyling.Duration());
2254 void Document::LexerChanged() {
2255 // Tell the watchers the lexer has changed.
2256 for (const WatcherWithUserData &watcher : watchers) {
2257 watcher.watcher->NotifyLexerChanged(this, watcher.userData);
2261 LexInterface *Document::GetLexInterface() const {
2262 return pli.get();
2265 void Document::SetLexInterface(LexInterface *pLexInterface) {
2266 pli.reset(pLexInterface);
2269 int SCI_METHOD Document::SetLineState(Sci_Position line, int state) {
2270 const int statePrevious = States()->SetLineState(static_cast<Sci::Line>(line), state);
2271 if (state != statePrevious) {
2272 const DocModification mh(SC_MOD_CHANGELINESTATE, LineStart(line), 0, 0, nullptr,
2273 static_cast<Sci::Line>(line));
2274 NotifyModified(mh);
2276 return statePrevious;
2279 int SCI_METHOD Document::GetLineState(Sci_Position line) const {
2280 return States()->GetLineState(static_cast<Sci::Line>(line));
2283 Sci::Line Document::GetMaxLineState() const {
2284 return States()->GetMaxLineState();
2287 void SCI_METHOD Document::ChangeLexerState(Sci_Position start, Sci_Position end) {
2288 const DocModification mh(SC_MOD_LEXERSTATE, start,
2289 end-start, 0, 0, 0);
2290 NotifyModified(mh);
2293 StyledText Document::MarginStyledText(Sci::Line line) const {
2294 const LineAnnotation *pla = Margins();
2295 return StyledText(pla->Length(line), pla->Text(line),
2296 pla->MultipleStyles(line), pla->Style(line), pla->Styles(line));
2299 void Document::MarginSetText(Sci::Line line, const char *text) {
2300 Margins()->SetText(line, text);
2301 const DocModification mh(SC_MOD_CHANGEMARGIN, LineStart(line),
2302 0, 0, 0, line);
2303 NotifyModified(mh);
2306 void Document::MarginSetStyle(Sci::Line line, int style) {
2307 Margins()->SetStyle(line, style);
2308 NotifyModified(DocModification(SC_MOD_CHANGEMARGIN, LineStart(line),
2309 0, 0, 0, line));
2312 void Document::MarginSetStyles(Sci::Line line, const unsigned char *styles) {
2313 Margins()->SetStyles(line, styles);
2314 NotifyModified(DocModification(SC_MOD_CHANGEMARGIN, LineStart(line),
2315 0, 0, 0, line));
2318 void Document::MarginClearAll() {
2319 const Sci::Line maxEditorLine = LinesTotal();
2320 for (Sci::Line l=0; l<maxEditorLine; l++)
2321 MarginSetText(l, nullptr);
2322 // Free remaining data
2323 Margins()->ClearAll();
2326 StyledText Document::AnnotationStyledText(Sci::Line line) const {
2327 const LineAnnotation *pla = Annotations();
2328 return StyledText(pla->Length(line), pla->Text(line),
2329 pla->MultipleStyles(line), pla->Style(line), pla->Styles(line));
2332 void Document::AnnotationSetText(Sci::Line line, const char *text) {
2333 if (line >= 0 && line < LinesTotal()) {
2334 const Sci::Line linesBefore = AnnotationLines(line);
2335 Annotations()->SetText(line, text);
2336 const int linesAfter = AnnotationLines(line);
2337 DocModification mh(SC_MOD_CHANGEANNOTATION, LineStart(line),
2338 0, 0, 0, line);
2339 mh.annotationLinesAdded = linesAfter - linesBefore;
2340 NotifyModified(mh);
2344 void Document::AnnotationSetStyle(Sci::Line line, int style) {
2345 Annotations()->SetStyle(line, style);
2346 const DocModification mh(SC_MOD_CHANGEANNOTATION, LineStart(line),
2347 0, 0, 0, line);
2348 NotifyModified(mh);
2351 void Document::AnnotationSetStyles(Sci::Line line, const unsigned char *styles) {
2352 if (line >= 0 && line < LinesTotal()) {
2353 Annotations()->SetStyles(line, styles);
2357 int Document::AnnotationLines(Sci::Line line) const {
2358 return Annotations()->Lines(line);
2361 void Document::AnnotationClearAll() {
2362 const Sci::Line maxEditorLine = LinesTotal();
2363 for (Sci::Line l=0; l<maxEditorLine; l++)
2364 AnnotationSetText(l, nullptr);
2365 // Free remaining data
2366 Annotations()->ClearAll();
2369 void Document::IncrementStyleClock() noexcept {
2370 styleClock = (styleClock + 1) % 0x100000;
2373 void SCI_METHOD Document::DecorationSetCurrentIndicator(int indicator) {
2374 decorations->SetCurrentIndicator(indicator);
2377 void SCI_METHOD Document::DecorationFillRange(Sci_Position position, int value, Sci_Position fillLength) {
2378 const FillResult<Sci::Position> fr = decorations->FillRange(
2379 position, value, fillLength);
2380 if (fr.changed) {
2381 const DocModification mh(SC_MOD_CHANGEINDICATOR | SC_PERFORMED_USER,
2382 fr.position, fr.fillLength);
2383 NotifyModified(mh);
2387 bool Document::AddWatcher(DocWatcher *watcher, void *userData) {
2388 const WatcherWithUserData wwud(watcher, userData);
2389 std::vector<WatcherWithUserData>::iterator it =
2390 std::find(watchers.begin(), watchers.end(), wwud);
2391 if (it != watchers.end())
2392 return false;
2393 watchers.push_back(wwud);
2394 return true;
2397 bool Document::RemoveWatcher(DocWatcher *watcher, void *userData) {
2398 std::vector<WatcherWithUserData>::iterator it =
2399 std::find(watchers.begin(), watchers.end(), WatcherWithUserData(watcher, userData));
2400 if (it != watchers.end()) {
2401 watchers.erase(it);
2402 return true;
2404 return false;
2407 void Document::NotifyModifyAttempt() {
2408 for (const WatcherWithUserData &watcher : watchers) {
2409 watcher.watcher->NotifyModifyAttempt(this, watcher.userData);
2413 void Document::NotifySavePoint(bool atSavePoint) {
2414 for (const WatcherWithUserData &watcher : watchers) {
2415 watcher.watcher->NotifySavePoint(this, watcher.userData, atSavePoint);
2419 void Document::NotifyModified(DocModification mh) {
2420 if (mh.modificationType & SC_MOD_INSERTTEXT) {
2421 decorations->InsertSpace(mh.position, mh.length);
2422 } else if (mh.modificationType & SC_MOD_DELETETEXT) {
2423 decorations->DeleteRange(mh.position, mh.length);
2425 for (const WatcherWithUserData &watcher : watchers) {
2426 watcher.watcher->NotifyModified(this, mh, watcher.userData);
2430 // Used for word part navigation.
2431 static bool IsASCIIPunctuationCharacter(unsigned int ch) noexcept {
2432 switch (ch) {
2433 case '!':
2434 case '"':
2435 case '#':
2436 case '$':
2437 case '%':
2438 case '&':
2439 case '\'':
2440 case '(':
2441 case ')':
2442 case '*':
2443 case '+':
2444 case ',':
2445 case '-':
2446 case '.':
2447 case '/':
2448 case ':':
2449 case ';':
2450 case '<':
2451 case '=':
2452 case '>':
2453 case '?':
2454 case '@':
2455 case '[':
2456 case '\\':
2457 case ']':
2458 case '^':
2459 case '_':
2460 case '`':
2461 case '{':
2462 case '|':
2463 case '}':
2464 case '~':
2465 return true;
2466 default:
2467 return false;
2471 bool Document::IsWordPartSeparator(unsigned int ch) const {
2472 return (WordCharacterClass(ch) == CharClassify::ccWord) && IsASCIIPunctuationCharacter(ch);
2475 Sci::Position Document::WordPartLeft(Sci::Position pos) const {
2476 if (pos > 0) {
2477 pos -= CharacterBefore(pos).widthBytes;
2478 CharacterExtracted ceStart = CharacterAfter(pos);
2479 if (IsWordPartSeparator(ceStart.character)) {
2480 while (pos > 0 && IsWordPartSeparator(CharacterAfter(pos).character)) {
2481 pos -= CharacterBefore(pos).widthBytes;
2484 if (pos > 0) {
2485 ceStart = CharacterAfter(pos);
2486 pos -= CharacterBefore(pos).widthBytes;
2487 if (IsLowerCase(ceStart.character)) {
2488 while (pos > 0 && IsLowerCase(CharacterAfter(pos).character))
2489 pos -= CharacterBefore(pos).widthBytes;
2490 if (!IsUpperCase(CharacterAfter(pos).character) && !IsLowerCase(CharacterAfter(pos).character))
2491 pos += CharacterAfter(pos).widthBytes;
2492 } else if (IsUpperCase(ceStart.character)) {
2493 while (pos > 0 && IsUpperCase(CharacterAfter(pos).character))
2494 pos -= CharacterBefore(pos).widthBytes;
2495 if (!IsUpperCase(CharacterAfter(pos).character))
2496 pos += CharacterAfter(pos).widthBytes;
2497 } else if (IsADigit(ceStart.character)) {
2498 while (pos > 0 && IsADigit(CharacterAfter(pos).character))
2499 pos -= CharacterBefore(pos).widthBytes;
2500 if (!IsADigit(CharacterAfter(pos).character))
2501 pos += CharacterAfter(pos).widthBytes;
2502 } else if (IsASCIIPunctuationCharacter(ceStart.character)) {
2503 while (pos > 0 && IsASCIIPunctuationCharacter(CharacterAfter(pos).character))
2504 pos -= CharacterBefore(pos).widthBytes;
2505 if (!IsASCIIPunctuationCharacter(CharacterAfter(pos).character))
2506 pos += CharacterAfter(pos).widthBytes;
2507 } else if (isspacechar(ceStart.character)) {
2508 while (pos > 0 && isspacechar(CharacterAfter(pos).character))
2509 pos -= CharacterBefore(pos).widthBytes;
2510 if (!isspacechar(CharacterAfter(pos).character))
2511 pos += CharacterAfter(pos).widthBytes;
2512 } else if (!IsASCII(ceStart.character)) {
2513 while (pos > 0 && !IsASCII(CharacterAfter(pos).character))
2514 pos -= CharacterBefore(pos).widthBytes;
2515 if (IsASCII(CharacterAfter(pos).character))
2516 pos += CharacterAfter(pos).widthBytes;
2517 } else {
2518 pos += CharacterAfter(pos).widthBytes;
2522 return pos;
2525 Sci::Position Document::WordPartRight(Sci::Position pos) const {
2526 CharacterExtracted ceStart = CharacterAfter(pos);
2527 const Sci::Position length = Length();
2528 if (IsWordPartSeparator(ceStart.character)) {
2529 while (pos < length && IsWordPartSeparator(CharacterAfter(pos).character))
2530 pos += CharacterAfter(pos).widthBytes;
2531 ceStart = CharacterAfter(pos);
2533 if (!IsASCII(ceStart.character)) {
2534 while (pos < length && !IsASCII(CharacterAfter(pos).character))
2535 pos += CharacterAfter(pos).widthBytes;
2536 } else if (IsLowerCase(ceStart.character)) {
2537 while (pos < length && IsLowerCase(CharacterAfter(pos).character))
2538 pos += CharacterAfter(pos).widthBytes;
2539 } else if (IsUpperCase(ceStart.character)) {
2540 if (IsLowerCase(CharacterAfter(pos + ceStart.widthBytes).character)) {
2541 pos += CharacterAfter(pos).widthBytes;
2542 while (pos < length && IsLowerCase(CharacterAfter(pos).character))
2543 pos += CharacterAfter(pos).widthBytes;
2544 } else {
2545 while (pos < length && IsUpperCase(CharacterAfter(pos).character))
2546 pos += CharacterAfter(pos).widthBytes;
2548 if (IsLowerCase(CharacterAfter(pos).character) && IsUpperCase(CharacterBefore(pos).character))
2549 pos -= CharacterBefore(pos).widthBytes;
2550 } else if (IsADigit(ceStart.character)) {
2551 while (pos < length && IsADigit(CharacterAfter(pos).character))
2552 pos += CharacterAfter(pos).widthBytes;
2553 } else if (IsASCIIPunctuationCharacter(ceStart.character)) {
2554 while (pos < length && IsASCIIPunctuationCharacter(CharacterAfter(pos).character))
2555 pos += CharacterAfter(pos).widthBytes;
2556 } else if (isspacechar(ceStart.character)) {
2557 while (pos < length && isspacechar(CharacterAfter(pos).character))
2558 pos += CharacterAfter(pos).widthBytes;
2559 } else {
2560 pos += CharacterAfter(pos).widthBytes;
2562 return pos;
2565 static constexpr bool IsLineEndChar(char c) noexcept {
2566 return (c == '\n' || c == '\r');
2569 Sci::Position Document::ExtendStyleRange(Sci::Position pos, int delta, bool singleLine) {
2570 const int sStart = cb.StyleAt(pos);
2571 if (delta < 0) {
2572 while (pos > 0 && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsLineEndChar(cb.CharAt(pos))))
2573 pos--;
2574 pos++;
2575 } else {
2576 while (pos < (Length()) && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsLineEndChar(cb.CharAt(pos))))
2577 pos++;
2579 return pos;
2582 static char BraceOpposite(char ch) noexcept {
2583 switch (ch) {
2584 case '(':
2585 return ')';
2586 case ')':
2587 return '(';
2588 case '[':
2589 return ']';
2590 case ']':
2591 return '[';
2592 case '{':
2593 return '}';
2594 case '}':
2595 return '{';
2596 case '<':
2597 return '>';
2598 case '>':
2599 return '<';
2600 default:
2601 return '\0';
2605 // TODO: should be able to extend styled region to find matching brace
2606 Sci::Position Document::BraceMatch(Sci::Position position, Sci::Position /*maxReStyle*/) {
2607 const char chBrace = CharAt(position);
2608 const char chSeek = BraceOpposite(chBrace);
2609 if (chSeek == '\0')
2610 return - 1;
2611 const int styBrace = StyleIndexAt(position);
2612 int direction = -1;
2613 if (chBrace == '(' || chBrace == '[' || chBrace == '{' || chBrace == '<')
2614 direction = 1;
2615 int depth = 1;
2616 position = NextPosition(position, direction);
2617 while ((position >= 0) && (position < Length())) {
2618 const char chAtPos = CharAt(position);
2619 const int styAtPos = StyleIndexAt(position);
2620 if ((position > GetEndStyled()) || (styAtPos == styBrace)) {
2621 if (chAtPos == chBrace)
2622 depth++;
2623 if (chAtPos == chSeek)
2624 depth--;
2625 if (depth == 0)
2626 return position;
2628 const Sci::Position positionBeforeMove = position;
2629 position = NextPosition(position, direction);
2630 if (position == positionBeforeMove)
2631 break;
2633 return - 1;
2637 * Implementation of RegexSearchBase for the default built-in regular expression engine
2639 class BuiltinRegex : public RegexSearchBase {
2640 public:
2641 explicit BuiltinRegex(CharClassify *charClassTable) : search(charClassTable) {}
2642 BuiltinRegex(const BuiltinRegex &) = delete;
2643 BuiltinRegex(BuiltinRegex &&) = delete;
2644 BuiltinRegex &operator=(const BuiltinRegex &) = delete;
2645 BuiltinRegex &operator=(BuiltinRegex &&) = delete;
2646 ~BuiltinRegex() override = default;
2648 Sci::Position FindText(Document *doc, Sci::Position minPos, Sci::Position maxPos, const char *s,
2649 bool caseSensitive, bool word, bool wordStart, int flags,
2650 Sci::Position *length) override;
2652 const char *SubstituteByPosition(Document *doc, const char *text, Sci::Position *length) override;
2654 private:
2655 RESearch search;
2656 std::string substituted;
2659 namespace {
2662 * RESearchRange keeps track of search range.
2664 class RESearchRange {
2665 public:
2666 const Document *doc;
2667 int increment;
2668 Sci::Position startPos;
2669 Sci::Position endPos;
2670 Sci::Line lineRangeStart;
2671 Sci::Line lineRangeEnd;
2672 Sci::Line lineRangeBreak;
2673 RESearchRange(const Document *doc_, Sci::Position minPos, Sci::Position maxPos) : doc(doc_) {
2674 increment = (minPos <= maxPos) ? 1 : -1;
2676 // Range endpoints should not be inside DBCS characters or between a CR and LF,
2677 // but just in case, move them.
2678 startPos = doc->MovePositionOutsideChar(minPos, 1, true);
2679 endPos = doc->MovePositionOutsideChar(maxPos, 1, true);
2681 lineRangeStart = doc->SciLineFromPosition(startPos);
2682 lineRangeEnd = doc->SciLineFromPosition(endPos);
2683 lineRangeBreak = lineRangeEnd + increment;
2685 Range LineRange(Sci::Line line) const {
2686 Range range(doc->LineStart(line), doc->LineEnd(line));
2687 if (increment == 1) {
2688 if (line == lineRangeStart)
2689 range.start = startPos;
2690 if (line == lineRangeEnd)
2691 range.end = endPos;
2692 } else {
2693 if (line == lineRangeEnd)
2694 range.start = endPos;
2695 if (line == lineRangeStart)
2696 range.end = startPos;
2698 return range;
2702 // Define a way for the Regular Expression code to access the document
2703 class DocumentIndexer : public CharacterIndexer {
2704 Document *pdoc;
2705 Sci::Position end;
2706 public:
2707 DocumentIndexer(Document *pdoc_, Sci::Position end_) noexcept :
2708 pdoc(pdoc_), end(end_) {
2711 DocumentIndexer(const DocumentIndexer &) = delete;
2712 DocumentIndexer(DocumentIndexer &&) = delete;
2713 DocumentIndexer &operator=(const DocumentIndexer &) = delete;
2714 DocumentIndexer &operator=(DocumentIndexer &&) = delete;
2716 ~DocumentIndexer() override = default;
2718 char CharAt(Sci::Position index) const noexcept override {
2719 if (index < 0 || index >= end)
2720 return 0;
2721 else
2722 return pdoc->CharAt(index);
2726 #ifndef NO_CXX11_REGEX
2728 class ByteIterator {
2729 public:
2730 typedef std::bidirectional_iterator_tag iterator_category;
2731 typedef char value_type;
2732 typedef ptrdiff_t difference_type;
2733 typedef char* pointer;
2734 typedef char& reference;
2736 const Document *doc;
2737 Sci::Position position;
2739 ByteIterator(const Document *doc_=nullptr, Sci::Position position_=0) noexcept :
2740 doc(doc_), position(position_) {
2742 ByteIterator(const ByteIterator &other) noexcept {
2743 doc = other.doc;
2744 position = other.position;
2746 ByteIterator(ByteIterator &&other) noexcept {
2747 doc = other.doc;
2748 position = other.position;
2750 ByteIterator &operator=(const ByteIterator &other) noexcept {
2751 if (this != &other) {
2752 doc = other.doc;
2753 position = other.position;
2755 return *this;
2757 ByteIterator &operator=(ByteIterator &&) noexcept = default;
2758 ~ByteIterator() = default;
2759 char operator*() const noexcept {
2760 return doc->CharAt(position);
2762 ByteIterator &operator++() noexcept {
2763 position++;
2764 return *this;
2766 ByteIterator operator++(int) noexcept {
2767 ByteIterator retVal(*this);
2768 position++;
2769 return retVal;
2771 ByteIterator &operator--() noexcept {
2772 position--;
2773 return *this;
2775 bool operator==(const ByteIterator &other) const noexcept {
2776 return doc == other.doc && position == other.position;
2778 bool operator!=(const ByteIterator &other) const noexcept {
2779 return doc != other.doc || position != other.position;
2781 Sci::Position Pos() const noexcept {
2782 return position;
2784 Sci::Position PosRoundUp() const noexcept {
2785 return position;
2789 // On Windows, wchar_t is 16 bits wide and on Unix it is 32 bits wide.
2790 // Would be better to use sizeof(wchar_t) or similar to differentiate
2791 // but easier for now to hard-code platforms.
2792 // C++11 has char16_t and char32_t but neither Clang nor Visual C++
2793 // appear to allow specializing basic_regex over these.
2795 #ifdef _WIN32
2796 #define WCHAR_T_IS_16 1
2797 #else
2798 #define WCHAR_T_IS_16 0
2799 #endif
2801 #if WCHAR_T_IS_16
2803 // On Windows, report non-BMP characters as 2 separate surrogates as that
2804 // matches wregex since it is based on wchar_t.
2805 class UTF8Iterator {
2806 // These 3 fields determine the iterator position and are used for comparisons
2807 const Document *doc;
2808 Sci::Position position;
2809 size_t characterIndex;
2810 // Remaining fields are derived from the determining fields so are excluded in comparisons
2811 unsigned int lenBytes;
2812 size_t lenCharacters;
2813 wchar_t buffered[2];
2814 public:
2815 typedef std::bidirectional_iterator_tag iterator_category;
2816 typedef wchar_t value_type;
2817 typedef ptrdiff_t difference_type;
2818 typedef wchar_t* pointer;
2819 typedef wchar_t& reference;
2821 UTF8Iterator(const Document *doc_=nullptr, Sci::Position position_=0) noexcept :
2822 doc(doc_), position(position_), characterIndex(0), lenBytes(0), lenCharacters(0), buffered{} {
2823 buffered[0] = 0;
2824 buffered[1] = 0;
2825 if (doc) {
2826 ReadCharacter();
2829 UTF8Iterator(const UTF8Iterator &other) noexcept : buffered{} {
2830 doc = other.doc;
2831 position = other.position;
2832 characterIndex = other.characterIndex;
2833 lenBytes = other.lenBytes;
2834 lenCharacters = other.lenCharacters;
2835 buffered[0] = other.buffered[0];
2836 buffered[1] = other.buffered[1];
2838 UTF8Iterator(UTF8Iterator &&other) noexcept = default;
2839 UTF8Iterator &operator=(const UTF8Iterator &other) noexcept {
2840 if (this != &other) {
2841 doc = other.doc;
2842 position = other.position;
2843 characterIndex = other.characterIndex;
2844 lenBytes = other.lenBytes;
2845 lenCharacters = other.lenCharacters;
2846 buffered[0] = other.buffered[0];
2847 buffered[1] = other.buffered[1];
2849 return *this;
2851 UTF8Iterator &operator=(UTF8Iterator &&) noexcept = default;
2852 ~UTF8Iterator() = default;
2853 wchar_t operator*() const noexcept {
2854 assert(lenCharacters != 0);
2855 return buffered[characterIndex];
2857 UTF8Iterator &operator++() noexcept {
2858 if ((characterIndex + 1) < (lenCharacters)) {
2859 characterIndex++;
2860 } else {
2861 position += lenBytes;
2862 ReadCharacter();
2863 characterIndex = 0;
2865 return *this;
2867 UTF8Iterator operator++(int) noexcept {
2868 UTF8Iterator retVal(*this);
2869 if ((characterIndex + 1) < (lenCharacters)) {
2870 characterIndex++;
2871 } else {
2872 position += lenBytes;
2873 ReadCharacter();
2874 characterIndex = 0;
2876 return retVal;
2878 UTF8Iterator &operator--() noexcept {
2879 if (characterIndex) {
2880 characterIndex--;
2881 } else {
2882 position = doc->NextPosition(position, -1);
2883 ReadCharacter();
2884 characterIndex = lenCharacters - 1;
2886 return *this;
2888 bool operator==(const UTF8Iterator &other) const noexcept {
2889 // Only test the determining fields, not the character widths and values derived from this
2890 return doc == other.doc &&
2891 position == other.position &&
2892 characterIndex == other.characterIndex;
2894 bool operator!=(const UTF8Iterator &other) const noexcept {
2895 // Only test the determining fields, not the character widths and values derived from this
2896 return doc != other.doc ||
2897 position != other.position ||
2898 characterIndex != other.characterIndex;
2900 Sci::Position Pos() const noexcept {
2901 return position;
2903 Sci::Position PosRoundUp() const noexcept {
2904 if (characterIndex)
2905 return position + lenBytes; // Force to end of character
2906 else
2907 return position;
2909 private:
2910 void ReadCharacter() noexcept {
2911 const Document::CharacterExtracted charExtracted = doc->ExtractCharacter(position);
2912 lenBytes = charExtracted.widthBytes;
2913 if (charExtracted.character == unicodeReplacementChar) {
2914 lenCharacters = 1;
2915 buffered[0] = static_cast<wchar_t>(charExtracted.character);
2916 } else {
2917 lenCharacters = UTF16FromUTF32Character(charExtracted.character, buffered);
2922 #else
2924 // On Unix, report non-BMP characters as single characters
2926 class UTF8Iterator {
2927 const Document *doc;
2928 Sci::Position position;
2929 public:
2930 typedef std::bidirectional_iterator_tag iterator_category;
2931 typedef wchar_t value_type;
2932 typedef ptrdiff_t difference_type;
2933 typedef wchar_t* pointer;
2934 typedef wchar_t& reference;
2936 UTF8Iterator(const Document *doc_=nullptr, Sci::Position position_=0) noexcept :
2937 doc(doc_), position(position_) {
2939 UTF8Iterator(const UTF8Iterator &other) noexcept {
2940 doc = other.doc;
2941 position = other.position;
2943 UTF8Iterator(UTF8Iterator &&other) noexcept = default;
2944 UTF8Iterator &operator=(const UTF8Iterator &other) noexcept {
2945 if (this != &other) {
2946 doc = other.doc;
2947 position = other.position;
2949 return *this;
2951 UTF8Iterator &operator=(UTF8Iterator &&) noexcept = default;
2952 ~UTF8Iterator() = default;
2953 wchar_t operator*() const noexcept {
2954 const Document::CharacterExtracted charExtracted = doc->ExtractCharacter(position);
2955 return charExtracted.character;
2957 UTF8Iterator &operator++() noexcept {
2958 position = doc->NextPosition(position, 1);
2959 return *this;
2961 UTF8Iterator operator++(int) noexcept {
2962 UTF8Iterator retVal(*this);
2963 position = doc->NextPosition(position, 1);
2964 return retVal;
2966 UTF8Iterator &operator--() noexcept {
2967 position = doc->NextPosition(position, -1);
2968 return *this;
2970 bool operator==(const UTF8Iterator &other) const noexcept {
2971 return doc == other.doc && position == other.position;
2973 bool operator!=(const UTF8Iterator &other) const noexcept {
2974 return doc != other.doc || position != other.position;
2976 Sci::Position Pos() const noexcept {
2977 return position;
2979 Sci::Position PosRoundUp() const noexcept {
2980 return position;
2984 #endif
2986 std::regex_constants::match_flag_type MatchFlags(const Document *doc, Sci::Position startPos, Sci::Position endPos) {
2987 std::regex_constants::match_flag_type flagsMatch = std::regex_constants::match_default;
2988 if (!doc->IsLineStartPosition(startPos))
2989 flagsMatch |= std::regex_constants::match_not_bol;
2990 if (!doc->IsLineEndPosition(endPos))
2991 flagsMatch |= std::regex_constants::match_not_eol;
2992 return flagsMatch;
2995 template<typename Iterator, typename Regex>
2996 bool MatchOnLines(const Document *doc, const Regex &regexp, const RESearchRange &resr, RESearch &search) {
2997 std::match_results<Iterator> match;
2999 // MSVC and libc++ have problems with ^ and $ matching line ends inside a range.
3000 // CRLF line ends are also a problem as ^ and $ only treat LF as a line end.
3001 // The std::regex::multiline option was added to C++17 to improve behaviour but
3002 // has not been implemented by compiler runtimes with MSVC always in multiline
3003 // mode and libc++ and libstdc++ always in single-line mode.
3004 // If multiline regex worked well then the line by line iteration could be removed
3005 // for the forwards case and replaced with the following 4 lines:
3006 #ifdef REGEX_MULTILINE
3007 Iterator itStart(doc, resr.startPos);
3008 Iterator itEnd(doc, resr.endPos);
3009 const std::regex_constants::match_flag_type flagsMatch = MatchFlags(doc, resr.startPos, resr.endPos);
3010 const bool matched = std::regex_search(itStart, itEnd, match, regexp, flagsMatch);
3011 #else
3012 // Line by line.
3013 bool matched = false;
3014 for (Sci::Line line = resr.lineRangeStart; line != resr.lineRangeBreak; line += resr.increment) {
3015 const Range lineRange = resr.LineRange(line);
3016 Iterator itStart(doc, lineRange.start);
3017 Iterator itEnd(doc, lineRange.end);
3018 std::regex_constants::match_flag_type flagsMatch = MatchFlags(doc, lineRange.start, lineRange.end);
3019 matched = std::regex_search(itStart, itEnd, match, regexp, flagsMatch);
3020 // Check for the last match on this line.
3021 if (matched) {
3022 if (resr.increment == -1) {
3023 while (matched) {
3024 Iterator itNext(doc, match[0].second.PosRoundUp());
3025 flagsMatch = MatchFlags(doc, itNext.Pos(), lineRange.end);
3026 std::match_results<Iterator> matchNext;
3027 matched = std::regex_search(itNext, itEnd, matchNext, regexp, flagsMatch);
3028 if (matched) {
3029 if (match[0].first == match[0].second) {
3030 // Empty match means failure so exit
3031 return false;
3033 match = matchNext;
3036 matched = true;
3038 break;
3041 #endif
3042 if (matched) {
3043 for (size_t co = 0; co < match.size(); co++) {
3044 search.bopat[co] = match[co].first.Pos();
3045 search.eopat[co] = match[co].second.PosRoundUp();
3046 const Sci::Position lenMatch = search.eopat[co] - search.bopat[co];
3047 search.pat[co].resize(lenMatch);
3048 for (Sci::Position iPos = 0; iPos < lenMatch; iPos++) {
3049 search.pat[co][iPos] = doc->CharAt(iPos + search.bopat[co]);
3053 return matched;
3056 Sci::Position Cxx11RegexFindText(const Document *doc, Sci::Position minPos, Sci::Position maxPos, const char *s,
3057 bool caseSensitive, Sci::Position *length, RESearch &search) {
3058 const RESearchRange resr(doc, minPos, maxPos);
3059 try {
3060 //ElapsedPeriod ep;
3061 std::regex::flag_type flagsRe = std::regex::ECMAScript;
3062 // Flags that apper to have no effect:
3063 // | std::regex::collate | std::regex::extended;
3064 if (!caseSensitive)
3065 flagsRe = flagsRe | std::regex::icase;
3067 // Clear the RESearch so can fill in matches
3068 search.Clear();
3070 bool matched = false;
3071 if (SC_CP_UTF8 == doc->dbcsCodePage) {
3072 const std::wstring ws = WStringFromUTF8(s, strlen(s));
3073 std::wregex regexp;
3074 regexp.assign(ws, flagsRe);
3075 matched = MatchOnLines<UTF8Iterator>(doc, regexp, resr, search);
3077 } else {
3078 std::regex regexp;
3079 regexp.assign(s, flagsRe);
3080 matched = MatchOnLines<ByteIterator>(doc, regexp, resr, search);
3083 Sci::Position posMatch = -1;
3084 if (matched) {
3085 posMatch = search.bopat[0];
3086 *length = search.eopat[0] - search.bopat[0];
3088 // Example - search in doc/ScintillaHistory.html for
3089 // [[:upper:]]eta[[:space:]]
3090 // On MacBook, normally around 1 second but with locale imbued -> 14 seconds.
3091 //const double durSearch = ep.Duration(true);
3092 //Platform::DebugPrintf("Search:%9.6g \n", durSearch);
3093 return posMatch;
3094 } catch (std::regex_error &) {
3095 // Failed to create regular expression
3096 throw RegexError();
3097 } catch (...) {
3098 // Failed in some other way
3099 return -1;
3103 #endif
3107 Sci::Position BuiltinRegex::FindText(Document *doc, Sci::Position minPos, Sci::Position maxPos, const char *s,
3108 bool caseSensitive, bool, bool, int flags,
3109 Sci::Position *length) {
3111 #ifndef NO_CXX11_REGEX
3112 if (flags & SCFIND_CXX11REGEX) {
3113 return Cxx11RegexFindText(doc, minPos, maxPos, s,
3114 caseSensitive, length, search);
3116 #endif
3118 const RESearchRange resr(doc, minPos, maxPos);
3120 const bool posix = (flags & SCFIND_POSIX) != 0;
3122 const char *errmsg = search.Compile(s, *length, caseSensitive, posix);
3123 if (errmsg) {
3124 return -1;
3126 // Find a variable in a property file: \$(\([A-Za-z0-9_.]+\))
3127 // Replace first '.' with '-' in each property file variable reference:
3128 // Search: \$(\([A-Za-z0-9_-]+\)\.\([A-Za-z0-9_.]+\))
3129 // Replace: $(\1-\2)
3130 Sci::Position pos = -1;
3131 Sci::Position lenRet = 0;
3132 const bool searchforLineStart = s[0] == '^';
3133 const char searchEnd = s[*length - 1];
3134 const char searchEndPrev = (*length > 1) ? s[*length - 2] : '\0';
3135 const bool searchforLineEnd = (searchEnd == '$') && (searchEndPrev != '\\');
3136 for (Sci::Line line = resr.lineRangeStart; line != resr.lineRangeBreak; line += resr.increment) {
3137 Sci::Position startOfLine = doc->LineStart(line);
3138 Sci::Position endOfLine = doc->LineEnd(line);
3139 if (resr.increment == 1) {
3140 if (line == resr.lineRangeStart) {
3141 if ((resr.startPos != startOfLine) && searchforLineStart)
3142 continue; // Can't match start of line if start position after start of line
3143 startOfLine = resr.startPos;
3145 if (line == resr.lineRangeEnd) {
3146 if ((resr.endPos != endOfLine) && searchforLineEnd)
3147 continue; // Can't match end of line if end position before end of line
3148 endOfLine = resr.endPos;
3150 } else {
3151 if (line == resr.lineRangeEnd) {
3152 if ((resr.endPos != startOfLine) && searchforLineStart)
3153 continue; // Can't match start of line if end position after start of line
3154 startOfLine = resr.endPos;
3156 if (line == resr.lineRangeStart) {
3157 if ((resr.startPos != endOfLine) && searchforLineEnd)
3158 continue; // Can't match end of line if start position before end of line
3159 endOfLine = resr.startPos;
3163 const DocumentIndexer di(doc, endOfLine);
3164 int success = search.Execute(di, startOfLine, endOfLine);
3165 if (success) {
3166 pos = search.bopat[0];
3167 // Ensure only whole characters selected
3168 search.eopat[0] = doc->MovePositionOutsideChar(search.eopat[0], 1, false);
3169 lenRet = search.eopat[0] - search.bopat[0];
3170 // There can be only one start of a line, so no need to look for last match in line
3171 if ((resr.increment == -1) && !searchforLineStart) {
3172 // Check for the last match on this line.
3173 int repetitions = 1000; // Break out of infinite loop
3174 while (success && (search.eopat[0] <= endOfLine) && (repetitions--)) {
3175 success = search.Execute(di, pos+1, endOfLine);
3176 if (success) {
3177 if (search.eopat[0] <= minPos) {
3178 pos = search.bopat[0];
3179 lenRet = search.eopat[0] - search.bopat[0];
3180 } else {
3181 success = 0;
3186 break;
3189 *length = lenRet;
3190 return pos;
3193 const char *BuiltinRegex::SubstituteByPosition(Document *doc, const char *text, Sci::Position *length) {
3194 substituted.clear();
3195 const DocumentIndexer di(doc, doc->Length());
3196 search.GrabMatches(di);
3197 for (Sci::Position j = 0; j < *length; j++) {
3198 if (text[j] == '\\') {
3199 if (text[j + 1] >= '0' && text[j + 1] <= '9') {
3200 const unsigned int patNum = text[j + 1] - '0';
3201 const Sci::Position len = search.eopat[patNum] - search.bopat[patNum];
3202 if (!search.pat[patNum].empty()) // Will be null if try for a match that did not occur
3203 substituted.append(search.pat[patNum].c_str(), len);
3204 j++;
3205 } else {
3206 j++;
3207 switch (text[j]) {
3208 case 'a':
3209 substituted.push_back('\a');
3210 break;
3211 case 'b':
3212 substituted.push_back('\b');
3213 break;
3214 case 'f':
3215 substituted.push_back('\f');
3216 break;
3217 case 'n':
3218 substituted.push_back('\n');
3219 break;
3220 case 'r':
3221 substituted.push_back('\r');
3222 break;
3223 case 't':
3224 substituted.push_back('\t');
3225 break;
3226 case 'v':
3227 substituted.push_back('\v');
3228 break;
3229 case '\\':
3230 substituted.push_back('\\');
3231 break;
3232 default:
3233 substituted.push_back('\\');
3234 j--;
3237 } else {
3238 substituted.push_back(text[j]);
3241 *length = substituted.length();
3242 return substituted.c_str();
3245 #ifndef SCI_OWNREGEX
3247 RegexSearchBase *Scintilla::CreateRegexSearch(CharClassify *charClassTable) {
3248 return new BuiltinRegex(charClassTable);
3251 #endif