No need to set variable to NULL
[TortoiseGit.git] / ext / scintilla / src / Document.cxx
blobeb52b1b378f972f8a08e0ba1ff36e3688be9e664
1 // Scintilla source code edit control
2 /** @file Document.cxx
3 ** Text document that handles notifications, DBCS, styling, words and end of line.
4 **/
5 // Copyright 1998-2011 by Neil Hodgson <neilh@scintilla.org>
6 // The License.txt file describes the conditions under which this software may be distributed.
8 #include <cstddef>
9 #include <cstdlib>
10 #include <cassert>
11 #include <cstring>
12 #include <cstdio>
13 #include <cmath>
15 #include <stdexcept>
16 #include <string>
17 #include <string_view>
18 #include <vector>
19 #include <array>
20 #include <forward_list>
21 #include <optional>
22 #include <algorithm>
23 #include <memory>
24 #include <chrono>
26 #ifndef NO_CXX11_REGEX
27 #include <regex>
28 #endif
30 #include "ScintillaTypes.h"
31 #include "ILoader.h"
32 #include "ILexer.h"
34 #include "Debugging.h"
36 #include "CharacterType.h"
37 #include "CharacterCategoryMap.h"
38 #include "Position.h"
39 #include "SplitVector.h"
40 #include "Partitioning.h"
41 #include "RunStyles.h"
42 #include "CellBuffer.h"
43 #include "PerLine.h"
44 #include "CharClassify.h"
45 #include "Decoration.h"
46 #include "CaseFolder.h"
47 #include "Document.h"
48 #include "RESearch.h"
49 #include "UniConversion.h"
50 #include "ElapsedPeriod.h"
52 using namespace Scintilla;
53 using namespace Scintilla::Internal;
55 #if defined(__GNUC__) && !defined(__clang__)
56 // False warnings from g++ 14.1 for UTF-8 accumulation code where UTF8MaxBytes allocated.
57 #pragma GCC diagnostic ignored "-Wstringop-overflow"
58 #endif
60 LexInterface::LexInterface(Document *pdoc_) noexcept : pdoc(pdoc_), performingStyle(false) {
63 LexInterface::~LexInterface() noexcept = default;
65 void LexInterface::SetInstance(ILexer5 *instance_) noexcept {
66 instance.reset(instance_);
69 void LexInterface::Colourise(Sci::Position start, Sci::Position end) {
70 if (pdoc && instance && !performingStyle) {
71 // Protect against reentrance, which may occur, for example, when
72 // fold points are discovered while performing styling and the folding
73 // code looks for child lines which may trigger styling.
74 performingStyle = true;
76 const Sci::Position lengthDoc = pdoc->Length();
77 if (end == -1)
78 end = lengthDoc;
79 const Sci::Position len = end - start;
81 PLATFORM_ASSERT(len >= 0);
82 PLATFORM_ASSERT(start + len <= lengthDoc);
84 int styleStart = 0;
85 if (start > 0)
86 styleStart = pdoc->StyleAt(start - 1);
88 if (len > 0) {
89 instance->Lex(start, len, styleStart, pdoc);
90 instance->Fold(start, len, styleStart, pdoc);
93 performingStyle = false;
97 LineEndType LexInterface::LineEndTypesSupported() {
98 if (instance) {
99 return static_cast<LineEndType>(instance->LineEndTypesSupported());
101 return LineEndType::Default;
104 bool LexInterface::UseContainerLexing() const noexcept {
105 return !instance;
108 ActionDuration::ActionDuration(double duration_, double minDuration_, double maxDuration_) noexcept :
109 duration(duration_), minDuration(minDuration_), maxDuration(maxDuration_) {
112 void ActionDuration::AddSample(size_t numberActions, double durationOfActions) noexcept {
113 // Only adjust for multiple actions to avoid instability
114 if (numberActions < 8)
115 return;
117 // Alpha value for exponential smoothing.
118 // Most recent value contributes 25% to smoothed value.
119 constexpr double alpha = 0.25;
121 const double durationOne = durationOfActions / numberActions;
122 duration = std::clamp(alpha * durationOne + (1.0 - alpha) * duration,
123 minDuration, maxDuration);
126 double ActionDuration::Duration() const noexcept {
127 return duration;
130 size_t ActionDuration::ActionsInAllowedTime(double secondsAllowed) const noexcept {
131 return std::lround(secondsAllowed / Duration());
134 CharacterExtracted::CharacterExtracted(const unsigned char *charBytes, size_t widthCharBytes) noexcept {
135 const int utf8status = UTF8Classify(charBytes, widthCharBytes);
136 if (utf8status & UTF8MaskInvalid) {
137 // Treat as invalid and use up just one byte
138 character = unicodeReplacementChar;
139 widthBytes = 1;
140 } else {
141 character = UnicodeFromUTF8(charBytes);
142 widthBytes = utf8status & UTF8MaskWidth;
146 Document::Document(DocumentOption options) :
147 cb(!FlagSet(options, DocumentOption::StylesNone), FlagSet(options, DocumentOption::TextLarge)),
148 durationStyleOneByte(0.000001, 0.0000001, 0.00001) {
149 refCount = 0;
150 #ifdef _WIN32
151 eolMode = EndOfLine::CrLf;
152 #else
153 eolMode = EndOfLine::Lf;
154 #endif
155 dbcsCodePage = CpUtf8;
156 lineEndBitSet = LineEndType::Default;
157 endStyled = 0;
158 styleClock = 0;
159 enteredModification = 0;
160 enteredStyling = 0;
161 enteredReadOnlyCount = 0;
162 insertionSet = false;
163 tabInChars = 8;
164 indentInChars = 0;
165 actualIndentInChars = 8;
166 useTabs = true;
167 tabIndents = true;
168 backspaceUnindents = false;
170 matchesValid = false;
172 perLineData[ldMarkers] = std::make_unique<LineMarkers>();
173 perLineData[ldLevels] = std::make_unique<LineLevels>();
174 perLineData[ldState] = std::make_unique<LineState>();
175 perLineData[ldMargin] = std::make_unique<LineAnnotation>();
176 perLineData[ldAnnotation] = std::make_unique<LineAnnotation>();
177 perLineData[ldEOLAnnotation] = std::make_unique<LineAnnotation>();
179 decorations = DecorationListCreate(IsLarge());
181 cb.SetPerLine(this);
182 cb.SetUTF8Substance(CpUtf8 == dbcsCodePage);
185 Document::~Document() {
186 for (const WatcherWithUserData &watcher : watchers) {
187 watcher.watcher->NotifyDeleted(this, watcher.userData);
191 // Increase reference count and return its previous value.
192 int SCI_METHOD Document::AddRef() noexcept {
193 return refCount++;
196 // Decrease reference count and return its previous value.
197 // Delete the document if reference count reaches zero.
198 int SCI_METHOD Document::Release() {
199 const int curRefCount = --refCount;
200 if (curRefCount == 0)
201 delete this;
202 return curRefCount;
205 void Document::Init() {
206 for (const std::unique_ptr<PerLine> &pl : perLineData) {
207 if (pl)
208 pl->Init();
212 void Document::InsertLine(Sci::Line line) {
213 for (const std::unique_ptr<PerLine> &pl : perLineData) {
214 if (pl)
215 pl->InsertLine(line);
219 void Document::InsertLines(Sci::Line line, Sci::Line lines) {
220 for (const auto &pl : perLineData) {
221 if (pl)
222 pl->InsertLines(line, lines);
226 void Document::RemoveLine(Sci::Line line) {
227 for (const std::unique_ptr<PerLine> &pl : perLineData) {
228 if (pl)
229 pl->RemoveLine(line);
233 LineMarkers *Document::Markers() const noexcept {
234 return static_cast<LineMarkers *>(perLineData[ldMarkers].get());
237 LineLevels *Document::Levels() const noexcept {
238 return static_cast<LineLevels *>(perLineData[ldLevels].get());
241 LineState *Document::States() const noexcept {
242 return static_cast<LineState *>(perLineData[ldState].get());
245 LineAnnotation *Document::Margins() const noexcept {
246 return static_cast<LineAnnotation *>(perLineData[ldMargin].get());
249 LineAnnotation *Document::Annotations() const noexcept {
250 return static_cast<LineAnnotation *>(perLineData[ldAnnotation].get());
253 LineAnnotation *Document::EOLAnnotations() const noexcept {
254 return static_cast<LineAnnotation *>(perLineData[ldEOLAnnotation].get());
257 LineEndType Document::LineEndTypesSupported() const {
258 if ((CpUtf8 == dbcsCodePage) && pli)
259 return pli->LineEndTypesSupported();
260 else
261 return LineEndType::Default;
264 bool Document::SetDBCSCodePage(int dbcsCodePage_) {
265 if (dbcsCodePage != dbcsCodePage_) {
266 dbcsCodePage = dbcsCodePage_;
267 SetCaseFolder(nullptr);
268 cb.SetLineEndTypes(lineEndBitSet & LineEndTypesSupported());
269 cb.SetUTF8Substance(CpUtf8 == dbcsCodePage);
270 ModifiedAt(0); // Need to restyle whole document
271 return true;
272 } else {
273 return false;
277 bool Document::SetLineEndTypesAllowed(LineEndType lineEndBitSet_) {
278 if (lineEndBitSet != lineEndBitSet_) {
279 lineEndBitSet = lineEndBitSet_;
280 const LineEndType lineEndBitSetActive = lineEndBitSet & LineEndTypesSupported();
281 if (lineEndBitSetActive != cb.GetLineEndTypes()) {
282 ModifiedAt(0);
283 cb.SetLineEndTypes(lineEndBitSetActive);
284 return true;
285 } else {
286 return false;
288 } else {
289 return false;
293 void Document::SetSavePoint() {
294 cb.SetSavePoint();
295 NotifySavePoint(true);
298 void Document::TentativeUndo() {
299 if (!TentativeActive())
300 return;
301 CheckReadOnly();
302 if (enteredModification == 0) {
303 enteredModification++;
304 if (!cb.IsReadOnly()) {
305 const bool startSavePoint = cb.IsSavePoint();
306 bool multiLine = false;
307 const int steps = cb.TentativeSteps();
308 //Platform::DebugPrintf("Steps=%d\n", steps);
309 for (int step = 0; step < steps; step++) {
310 const Sci::Line prevLinesTotal = LinesTotal();
311 const Action action = cb.GetUndoStep();
312 if (action.at == ActionType::remove) {
313 NotifyModified(DocModification(
314 ModificationFlags::BeforeInsert | ModificationFlags::Undo, action));
315 } else if (action.at == ActionType::container) {
316 DocModification dm(ModificationFlags::Container | ModificationFlags::Undo);
317 dm.token = action.position;
318 NotifyModified(dm);
319 } else {
320 NotifyModified(DocModification(
321 ModificationFlags::BeforeDelete | ModificationFlags::Undo, action));
323 cb.PerformUndoStep();
324 if (action.at != ActionType::container) {
325 ModifiedAt(action.position);
328 ModificationFlags modFlags = ModificationFlags::Undo;
329 // With undo, an insertion action becomes a deletion notification
330 if (action.at == ActionType::remove) {
331 modFlags |= ModificationFlags::InsertText;
332 } else if (action.at == ActionType::insert) {
333 modFlags |= ModificationFlags::DeleteText;
335 if (steps > 1)
336 modFlags |= ModificationFlags::MultiStepUndoRedo;
337 const Sci::Line linesAdded = LinesTotal() - prevLinesTotal;
338 if (linesAdded != 0)
339 multiLine = true;
340 if (step == steps - 1) {
341 modFlags |= ModificationFlags::LastStepInUndoRedo;
342 if (multiLine)
343 modFlags |= ModificationFlags::MultilineUndoRedo;
345 NotifyModified(DocModification(modFlags, action.position, action.lenData,
346 linesAdded, action.data));
349 const bool endSavePoint = cb.IsSavePoint();
350 if (startSavePoint != endSavePoint)
351 NotifySavePoint(endSavePoint);
353 cb.TentativeCommit();
355 enteredModification--;
359 int Document::UndoActions() const noexcept {
360 return cb.UndoActions();
363 void Document::SetUndoSavePoint(int action) noexcept {
364 cb.SetUndoSavePoint(action);
367 int Document::UndoSavePoint() const noexcept {
368 return cb.UndoSavePoint();
371 void Document::SetUndoDetach(int action) noexcept {
372 cb.SetUndoDetach(action);
375 int Document::UndoDetach() const noexcept {
376 return cb.UndoDetach();
379 void Document::SetUndoTentative(int action) noexcept {
380 cb.SetUndoTentative(action);
383 int Document::UndoTentative() const noexcept {
384 return cb.UndoTentative();
387 void Document::SetUndoCurrent(int action) {
388 cb.SetUndoCurrent(action);
391 int Document::UndoCurrent() const noexcept {
392 return cb.UndoCurrent();
395 int Document::UndoActionType(int action) const noexcept {
396 return cb.UndoActionType(action);
399 Sci::Position Document::UndoActionPosition(int action) const noexcept {
400 return cb.UndoActionPosition(action);
403 std::string_view Document::UndoActionText(int action) const noexcept {
404 return cb.UndoActionText(action);
407 void Document::PushUndoActionType(int type, Sci::Position position) {
408 cb.PushUndoActionType(type, position);
411 void Document::ChangeLastUndoActionText(size_t length, const char *text) {
412 cb.ChangeLastUndoActionText(length, text);
415 int Document::GetMark(Sci::Line line, bool includeChangeHistory) const {
416 int marksHistory = 0;
417 if (includeChangeHistory && (line < LinesTotal())) {
418 int marksEdition = 0;
420 const Sci::Position start = LineStart(line);
421 const Sci::Position lineNext = LineStart(line + 1);
422 for (Sci::Position position = start; position < lineNext;) {
423 const int edition = EditionAt(position);
424 if (edition) {
425 marksEdition |= 1 << (edition-1);
427 position = EditionEndRun(position);
429 const Sci::Position lineEnd = LineEnd(line);
430 for (Sci::Position position = start; position <= lineEnd;) {
431 marksEdition |= EditionDeletesAt(position);
432 position = EditionNextDelete(position);
435 /* Bits: RevertedToOrigin, Saved, Modified, RevertedToModified */
436 constexpr unsigned int editionShift = static_cast<unsigned int>(MarkerOutline::HistoryRevertedToOrigin);
437 marksHistory = marksEdition << editionShift;
440 return marksHistory | Markers()->MarkValue(line);
443 Sci::Line Document::MarkerNext(Sci::Line lineStart, int mask) const noexcept {
444 return Markers()->MarkerNext(lineStart, mask);
447 int Document::AddMark(Sci::Line line, int markerNum) {
448 if (line >= 0 && line < LinesTotal()) {
449 const int prev = Markers()->AddMark(line, markerNum, LinesTotal());
450 const DocModification mh(ModificationFlags::ChangeMarker, LineStart(line), 0, 0, nullptr, line);
451 NotifyModified(mh);
452 return prev;
453 } else {
454 return -1;
458 void Document::AddMarkSet(Sci::Line line, int valueSet) {
459 if (line < 0 || line >= LinesTotal()) {
460 return;
462 unsigned int m = valueSet;
463 for (int i = 0; m; i++, m >>= 1) {
464 if (m & 1)
465 Markers()->AddMark(line, i, LinesTotal());
467 const DocModification mh(ModificationFlags::ChangeMarker, LineStart(line), 0, 0, nullptr, line);
468 NotifyModified(mh);
471 void Document::DeleteMark(Sci::Line line, int markerNum) {
472 Markers()->DeleteMark(line, markerNum, false);
473 const DocModification mh(ModificationFlags::ChangeMarker, LineStart(line), 0, 0, nullptr, line);
474 NotifyModified(mh);
477 void Document::DeleteMarkFromHandle(int markerHandle) {
478 Markers()->DeleteMarkFromHandle(markerHandle);
479 DocModification mh(ModificationFlags::ChangeMarker);
480 mh.line = -1;
481 NotifyModified(mh);
484 void Document::DeleteAllMarks(int markerNum) {
485 bool someChanges = false;
486 for (Sci::Line line = 0; line < LinesTotal(); line++) {
487 if (Markers()->DeleteMark(line, markerNum, true))
488 someChanges = true;
490 if (someChanges) {
491 DocModification mh(ModificationFlags::ChangeMarker);
492 mh.line = -1;
493 NotifyModified(mh);
497 Sci::Line Document::LineFromHandle(int markerHandle) const noexcept {
498 return Markers()->LineFromHandle(markerHandle);
501 int Document::MarkerNumberFromLine(Sci::Line line, int which) const noexcept {
502 return Markers()->NumberFromLine(line, which);
505 int Document::MarkerHandleFromLine(Sci::Line line, int which) const noexcept {
506 return Markers()->HandleFromLine(line, which);
509 Sci_Position SCI_METHOD Document::LineStart(Sci_Position line) const {
510 return cb.LineStart(line);
513 Range Document::LineRange(Sci::Line line) const noexcept {
514 return {cb.LineStart(line), cb.LineStart(line + 1)};
517 bool Document::IsLineStartPosition(Sci::Position position) const noexcept {
518 return LineStartPosition(position) == position;
521 Sci_Position SCI_METHOD Document::LineEnd(Sci_Position line) const {
522 return cb.LineEnd(line);
525 int SCI_METHOD Document::DEVersion() const noexcept {
526 return deRelease0;
529 void SCI_METHOD Document::SetErrorStatus(int status) {
530 // Tell the watchers an error has occurred.
531 for (const WatcherWithUserData &watcher : watchers) {
532 watcher.watcher->NotifyErrorOccurred(this, watcher.userData, static_cast<Status>(status));
536 Sci_Position SCI_METHOD Document::LineFromPosition(Sci_Position pos) const {
537 return cb.LineFromPosition(pos);
540 Sci::Line Document::SciLineFromPosition(Sci::Position pos) const noexcept {
541 // Avoids casting in callers for this very common function
542 return cb.LineFromPosition(pos);
545 Sci::Position Document::LineStartPosition(Sci::Position position) const noexcept {
546 return cb.LineStart(cb.LineFromPosition(position));
549 Sci::Position Document::LineEndPosition(Sci::Position position) const noexcept {
550 return cb.LineEnd(cb.LineFromPosition(position));
553 bool Document::IsLineEndPosition(Sci::Position position) const noexcept {
554 return LineEndPosition(position) == position;
557 bool Document::IsPositionInLineEnd(Sci::Position position) const noexcept {
558 return position >= LineEndPosition(position);
561 Sci::Position Document::VCHomePosition(Sci::Position position) const {
562 const Sci::Line line = SciLineFromPosition(position);
563 const Sci::Position startPosition = LineStart(line);
564 const Sci::Position endLine = LineEnd(line);
565 Sci::Position startText = startPosition;
566 while (startText < endLine && IsSpaceOrTab(cb.CharAt(startText)))
567 startText++;
568 if (position == startText)
569 return startPosition;
570 else
571 return startText;
574 Sci::Position Document::IndexLineStart(Sci::Line line, LineCharacterIndexType lineCharacterIndex) const noexcept {
575 return cb.IndexLineStart(line, lineCharacterIndex);
578 Sci::Line Document::LineFromPositionIndex(Sci::Position pos, LineCharacterIndexType lineCharacterIndex) const noexcept {
579 return cb.LineFromPositionIndex(pos, lineCharacterIndex);
582 Sci::Line Document::LineFromPositionAfter(Sci::Line line, Sci::Position length) const noexcept {
583 const Sci::Position posAfter = cb.LineStart(line) + length;
584 if (posAfter >= LengthNoExcept()) {
585 return LinesTotal();
587 const Sci::Line lineAfter = SciLineFromPosition(posAfter);
588 if (lineAfter > line) {
589 return lineAfter;
590 } else {
591 // Want to make some progress so return next line
592 return lineAfter + 1;
596 int SCI_METHOD Document::SetLevel(Sci_Position line, int level) {
597 const int prev = Levels()->SetLevel(line, level, LinesTotal());
598 if (prev != level) {
599 DocModification mh(ModificationFlags::ChangeFold | ModificationFlags::ChangeMarker,
600 LineStart(line), 0, 0, nullptr, line);
601 mh.foldLevelNow = static_cast<FoldLevel>(level);
602 mh.foldLevelPrev = static_cast<FoldLevel>(prev);
603 NotifyModified(mh);
605 return prev;
608 int SCI_METHOD Document::GetLevel(Sci_Position line) const {
609 return Levels()->GetLevel(line);
612 FoldLevel Document::GetFoldLevel(Sci_Position line) const noexcept {
613 return Levels()->GetFoldLevel(line);
616 void Document::ClearLevels() {
617 Levels()->ClearLevels();
620 static bool IsSubordinate(FoldLevel levelStart, FoldLevel levelTry) noexcept {
621 if (LevelIsWhitespace(levelTry))
622 return true;
623 else
624 return LevelNumber(levelStart) < LevelNumber(levelTry);
627 Sci::Line Document::GetLastChild(Sci::Line lineParent, std::optional<FoldLevel> level, Sci::Line lastLine) {
628 const FoldLevel levelStart = LevelNumberPart(level ? *level : GetFoldLevel(lineParent));
629 const Sci::Line maxLine = LinesTotal();
630 const Sci::Line lookLastLine = (lastLine != -1) ? std::min(LinesTotal() - 1, lastLine) : -1;
631 Sci::Line lineMaxSubord = lineParent;
632 while (lineMaxSubord < maxLine - 1) {
633 EnsureStyledTo(LineStart(lineMaxSubord + 2));
634 if (!IsSubordinate(levelStart, GetFoldLevel(lineMaxSubord + 1)))
635 break;
636 if ((lookLastLine != -1) && (lineMaxSubord >= lookLastLine) && !LevelIsWhitespace(GetFoldLevel(lineMaxSubord)))
637 break;
638 lineMaxSubord++;
640 if (lineMaxSubord > lineParent) {
641 if (levelStart > LevelNumberPart(GetFoldLevel(lineMaxSubord + 1))) {
642 // Have chewed up some whitespace that belongs to a parent so seek back
643 if (LevelIsWhitespace(GetFoldLevel(lineMaxSubord))) {
644 lineMaxSubord--;
648 return lineMaxSubord;
651 Sci::Line Document::GetFoldParent(Sci::Line line) const noexcept {
652 return Levels()->GetFoldParent(line);
655 void Document::GetHighlightDelimiters(HighlightDelimiter &highlightDelimiter, Sci::Line line, Sci::Line lastLine) {
656 const FoldLevel level = GetFoldLevel(line);
657 const Sci::Line lookLastLine = std::max(line, lastLine) + 1;
659 Sci::Line lookLine = line;
660 FoldLevel lookLineLevel = level;
661 FoldLevel lookLineLevelNum = LevelNumberPart(lookLineLevel);
662 while ((lookLine > 0) && (LevelIsWhitespace(lookLineLevel) ||
663 (LevelIsHeader(lookLineLevel) && (lookLineLevelNum >= LevelNumberPart(GetFoldLevel(lookLine + 1)))))) {
664 lookLineLevel = GetFoldLevel(--lookLine);
665 lookLineLevelNum = LevelNumberPart(lookLineLevel);
668 Sci::Line beginFoldBlock = LevelIsHeader(lookLineLevel) ? lookLine : GetFoldParent(lookLine);
669 if (beginFoldBlock == -1) {
670 highlightDelimiter.Clear();
671 return;
674 Sci::Line endFoldBlock = GetLastChild(beginFoldBlock, {}, lookLastLine);
675 Sci::Line firstChangeableLineBefore = -1;
676 if (endFoldBlock < line) {
677 lookLine = beginFoldBlock - 1;
678 lookLineLevel = GetFoldLevel(lookLine);
679 lookLineLevelNum = LevelNumberPart(lookLineLevel);
680 while ((lookLine >= 0) && (lookLineLevelNum >= FoldLevel::Base)) {
681 if (LevelIsHeader(lookLineLevel)) {
682 if (GetLastChild(lookLine, {}, lookLastLine) == line) {
683 beginFoldBlock = lookLine;
684 endFoldBlock = line;
685 firstChangeableLineBefore = line - 1;
688 if ((lookLine > 0) && (lookLineLevelNum == FoldLevel::Base) && (LevelNumberPart(GetFoldLevel(lookLine - 1)) > lookLineLevelNum))
689 break;
690 lookLineLevel = GetFoldLevel(--lookLine);
691 lookLineLevelNum = LevelNumberPart(lookLineLevel);
694 if (firstChangeableLineBefore == -1) {
695 for (lookLine = line - 1, lookLineLevel = GetFoldLevel(lookLine), lookLineLevelNum = LevelNumberPart(lookLineLevel);
696 lookLine >= beginFoldBlock;
697 lookLineLevel = GetFoldLevel(--lookLine), lookLineLevelNum = LevelNumberPart(lookLineLevel)) {
698 if (LevelIsWhitespace(lookLineLevel) || (lookLineLevelNum > LevelNumberPart(level))) {
699 firstChangeableLineBefore = lookLine;
700 break;
704 if (firstChangeableLineBefore == -1)
705 firstChangeableLineBefore = beginFoldBlock - 1;
707 Sci::Line firstChangeableLineAfter = -1;
708 for (lookLine = line + 1, lookLineLevel = GetFoldLevel(lookLine), lookLineLevelNum = LevelNumberPart(lookLineLevel);
709 lookLine <= endFoldBlock;
710 lookLineLevel = GetFoldLevel(++lookLine), lookLineLevelNum = LevelNumberPart(lookLineLevel)) {
711 if (LevelIsHeader(lookLineLevel) && (lookLineLevelNum < LevelNumberPart(GetFoldLevel(lookLine + 1)))) {
712 firstChangeableLineAfter = lookLine;
713 break;
716 if (firstChangeableLineAfter == -1)
717 firstChangeableLineAfter = endFoldBlock + 1;
719 highlightDelimiter.beginFoldBlock = beginFoldBlock;
720 highlightDelimiter.endFoldBlock = endFoldBlock;
721 highlightDelimiter.firstChangeableLineBefore = firstChangeableLineBefore;
722 highlightDelimiter.firstChangeableLineAfter = firstChangeableLineAfter;
725 Sci::Position Document::ClampPositionIntoDocument(Sci::Position pos) const noexcept {
726 return std::clamp<Sci::Position>(pos, 0, LengthNoExcept());
729 bool Document::IsCrLf(Sci::Position pos) const noexcept {
730 if (pos < 0)
731 return false;
732 if (pos >= (LengthNoExcept() - 1))
733 return false;
734 return (cb.CharAt(pos) == '\r') && (cb.CharAt(pos + 1) == '\n');
737 int Document::LenChar(Sci::Position pos) const noexcept {
738 if (pos < 0 || pos >= LengthNoExcept()) {
739 // Returning 1 instead of 0 to defend against hanging with a loop that goes (or starts) out of bounds.
740 return 1;
741 } else if (IsCrLf(pos)) {
742 return 2;
745 const unsigned char leadByte = cb.UCharAt(pos);
746 if (!dbcsCodePage || UTF8IsAscii(leadByte)) {
747 // Common case: ASCII character
748 return 1;
750 if (CpUtf8 == dbcsCodePage) {
751 const int widthCharBytes = UTF8BytesOfLead[leadByte];
752 unsigned char charBytes[UTF8MaxBytes] = { leadByte, 0, 0, 0 };
753 for (int b = 1; b < widthCharBytes; b++) {
754 charBytes[b] = cb.UCharAt(pos + b);
756 const int utf8status = UTF8Classify(charBytes, widthCharBytes);
757 if (utf8status & UTF8MaskInvalid) {
758 // Treat as invalid and use up just one byte
759 return 1;
760 } else {
761 return utf8status & UTF8MaskWidth;
763 } else {
764 if (IsDBCSLeadByteNoExcept(leadByte) && IsDBCSTrailByteNoExcept(cb.CharAt(pos + 1))) {
765 return 2;
766 } else {
767 return 1;
772 bool Document::InGoodUTF8(Sci::Position pos, Sci::Position &start, Sci::Position &end) const noexcept {
773 Sci::Position trail = pos;
774 while ((trail>0) && (pos-trail < UTF8MaxBytes) && UTF8IsTrailByte(cb.UCharAt(trail-1)))
775 trail--;
776 start = (trail > 0) ? trail-1 : trail;
778 const unsigned char leadByte = cb.UCharAt(start);
779 const int widthCharBytes = UTF8BytesOfLead[leadByte];
780 if (widthCharBytes == 1) {
781 return false;
782 } else {
783 const int trailBytes = widthCharBytes - 1;
784 const Sci::Position len = pos - start;
785 if (len > trailBytes)
786 // pos too far from lead
787 return false;
788 unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0};
789 for (Sci::Position b=1; b<widthCharBytes && ((start+b) < cb.Length()); b++)
790 charBytes[b] = cb.CharAt(start+b);
791 const int utf8status = UTF8Classify(charBytes, widthCharBytes);
792 if (utf8status & UTF8MaskInvalid)
793 return false;
794 end = start + widthCharBytes;
795 return true;
799 // Normalise a position so that it is not part way through a multi-byte character.
800 // This can occur in two situations -
801 // When lines are terminated with \r\n pairs which should be treated as one character.
802 // When displaying DBCS text such as Japanese.
803 // If moving, move the position in the indicated direction.
804 Sci::Position Document::MovePositionOutsideChar(Sci::Position pos, Sci::Position moveDir, bool checkLineEnd) const noexcept {
805 //Platform::DebugPrintf("NoCRLF %d %d\n", pos, moveDir);
806 // If out of range, just return minimum/maximum value.
807 if (pos <= 0)
808 return 0;
809 if (pos >= LengthNoExcept())
810 return LengthNoExcept();
812 // PLATFORM_ASSERT(pos > 0 && pos < LengthNoExcept());
813 if (checkLineEnd && IsCrLf(pos - 1)) {
814 if (moveDir > 0)
815 return pos + 1;
816 else
817 return pos - 1;
820 if (dbcsCodePage) {
821 if (CpUtf8 == dbcsCodePage) {
822 const unsigned char ch = cb.UCharAt(pos);
823 // If ch is not a trail byte then pos is valid intercharacter position
824 if (UTF8IsTrailByte(ch)) {
825 Sci::Position startUTF = pos;
826 Sci::Position endUTF = pos;
827 if (InGoodUTF8(pos, startUTF, endUTF)) {
828 // ch is a trail byte within a UTF-8 character
829 if (moveDir > 0)
830 pos = endUTF;
831 else
832 pos = startUTF;
834 // Else invalid UTF-8 so return position of isolated trail byte
836 } else {
837 // Anchor DBCS calculations at start of line because start of line can
838 // not be a DBCS trail byte.
839 const Sci::Position posStartLine = LineStartPosition(pos);
840 if (pos == posStartLine)
841 return pos;
843 // Step back until a non-lead-byte is found.
844 Sci::Position posCheck = pos;
845 while ((posCheck > posStartLine) && IsDBCSLeadByteNoExcept(cb.CharAt(posCheck-1)))
846 posCheck--;
848 // Check from known start of character.
849 while (posCheck < pos) {
850 const int mbsize = IsDBCSDualByteAt(posCheck) ? 2 : 1;
851 if (posCheck + mbsize == pos) {
852 return pos;
853 } else if (posCheck + mbsize > pos) {
854 if (moveDir > 0) {
855 return posCheck + mbsize;
856 } else {
857 return posCheck;
860 posCheck += mbsize;
865 return pos;
868 // NextPosition moves between valid positions - it can not handle a position in the middle of a
869 // multi-byte character. It is used to iterate through text more efficiently than MovePositionOutsideChar.
870 // A \r\n pair is treated as two characters.
871 Sci::Position Document::NextPosition(Sci::Position pos, int moveDir) const noexcept {
872 // If out of range, just return minimum/maximum value.
873 const int increment = (moveDir > 0) ? 1 : -1;
874 if (pos + increment <= 0)
875 return 0;
876 if (pos + increment >= cb.Length())
877 return cb.Length();
879 if (dbcsCodePage) {
880 if (CpUtf8 == dbcsCodePage) {
881 if (increment == 1) {
882 // Simple forward movement case so can avoid some checks
883 const unsigned char leadByte = cb.UCharAt(pos);
884 if (UTF8IsAscii(leadByte)) {
885 // Single byte character or invalid
886 pos++;
887 } else {
888 const int widthCharBytes = UTF8BytesOfLead[leadByte];
889 unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0};
890 for (int b=1; b<widthCharBytes; b++)
891 charBytes[b] = cb.CharAt(pos+b);
892 const int utf8status = UTF8Classify(charBytes, widthCharBytes);
893 if (utf8status & UTF8MaskInvalid)
894 pos++;
895 else
896 pos += utf8status & UTF8MaskWidth;
898 } else {
899 // Examine byte before position
900 pos--;
901 const unsigned char ch = cb.UCharAt(pos);
902 // If ch is not a trail byte then pos is valid intercharacter position
903 if (UTF8IsTrailByte(ch)) {
904 // If ch is a trail byte in a valid UTF-8 character then return start of character
905 Sci::Position startUTF = pos;
906 Sci::Position endUTF = pos;
907 if (InGoodUTF8(pos, startUTF, endUTF)) {
908 pos = startUTF;
910 // Else invalid UTF-8 so return position of isolated trail byte
913 } else {
914 if (moveDir > 0) {
915 const int mbsize = IsDBCSDualByteAt(pos) ? 2 : 1;
916 pos += mbsize;
917 if (pos > cb.Length())
918 pos = cb.Length();
919 } else {
920 // Anchor DBCS calculations at start of line because start of line can
921 // not be a DBCS trail byte.
922 const Sci::Position posStartLine = LineStartPosition(pos);
923 // See http://msdn.microsoft.com/en-us/library/cc194792%28v=MSDN.10%29.aspx
924 // http://msdn.microsoft.com/en-us/library/cc194790.aspx
925 if ((pos - 1) <= posStartLine) {
926 return pos - 1;
927 } else if (IsDBCSLeadByteNoExcept(cb.CharAt(pos - 1))) {
928 // Should actually be trail byte
929 if (IsDBCSDualByteAt(pos - 2)) {
930 return pos - 2;
931 } else {
932 // Invalid byte pair so treat as one byte wide
933 return pos - 1;
935 } else {
936 // Otherwise, step back until a non-lead-byte is found.
937 Sci::Position posTemp = pos - 1;
938 while (posStartLine <= --posTemp && IsDBCSLeadByteNoExcept(cb.CharAt(posTemp)))
940 // Now posTemp+1 must point to the beginning of a character,
941 // so figure out whether we went back an even or an odd
942 // number of bytes and go back 1 or 2 bytes, respectively.
943 const Sci::Position widthLast = ((pos - posTemp) & 1) + 1;
944 if ((widthLast == 2) && (IsDBCSDualByteAt(pos - widthLast))) {
945 return pos - widthLast;
947 // Byte before pos may be valid character or may be an invalid second byte
948 return pos - 1;
952 } else {
953 pos += increment;
956 return pos;
959 bool Document::NextCharacter(Sci::Position &pos, int moveDir) const noexcept {
960 // Returns true if pos changed
961 Sci::Position posNext = NextPosition(pos, moveDir);
962 if (posNext == pos) {
963 return false;
964 } else {
965 pos = posNext;
966 return true;
970 CharacterExtracted Document::CharacterAfter(Sci::Position position) const noexcept {
971 if (position >= LengthNoExcept()) {
972 return CharacterExtracted(unicodeReplacementChar, 0);
974 const unsigned char leadByte = cb.UCharAt(position);
975 if (!dbcsCodePage || UTF8IsAscii(leadByte)) {
976 // Common case: ASCII character
977 return CharacterExtracted(leadByte, 1);
979 if (CpUtf8 == dbcsCodePage) {
980 const int widthCharBytes = UTF8BytesOfLead[leadByte];
981 unsigned char charBytes[UTF8MaxBytes] = { leadByte, 0, 0, 0 };
982 for (int b = 1; b<widthCharBytes; b++)
983 charBytes[b] = cb.UCharAt(position + b);
984 return CharacterExtracted(charBytes, widthCharBytes);
985 } else {
986 if (IsDBCSLeadByteNoExcept(leadByte)) {
987 const unsigned char trailByte = cb.UCharAt(position + 1);
988 if (IsDBCSTrailByteNoExcept(trailByte)) {
989 return CharacterExtracted::DBCS(leadByte, trailByte);
992 return CharacterExtracted(leadByte, 1);
996 CharacterExtracted Document::CharacterBefore(Sci::Position position) const noexcept {
997 if (position <= 0) {
998 return CharacterExtracted(unicodeReplacementChar, 0);
1000 const unsigned char previousByte = cb.UCharAt(position - 1);
1001 if (0 == dbcsCodePage) {
1002 return CharacterExtracted(previousByte, 1);
1004 if (CpUtf8 == dbcsCodePage) {
1005 if (UTF8IsAscii(previousByte)) {
1006 return CharacterExtracted(previousByte, 1);
1008 position--;
1009 // If previousByte is not a trail byte then its invalid
1010 if (UTF8IsTrailByte(previousByte)) {
1011 // If previousByte is a trail byte in a valid UTF-8 character then find start of character
1012 Sci::Position startUTF = position;
1013 Sci::Position endUTF = position;
1014 if (InGoodUTF8(position, startUTF, endUTF)) {
1015 const Sci::Position widthCharBytes = endUTF - startUTF;
1016 unsigned char charBytes[UTF8MaxBytes] = { 0, 0, 0, 0 };
1017 for (Sci::Position b = 0; b<widthCharBytes; b++)
1018 charBytes[b] = cb.UCharAt(startUTF + b);
1019 return CharacterExtracted(charBytes, widthCharBytes);
1021 // Else invalid UTF-8 so return position of isolated trail byte
1023 return CharacterExtracted(unicodeReplacementChar, 1);
1024 } else {
1025 // Moving backwards in DBCS is complex so use NextPosition
1026 const Sci::Position posStartCharacter = NextPosition(position, -1);
1027 return CharacterAfter(posStartCharacter);
1031 // Return -1 on out-of-bounds
1032 Sci_Position SCI_METHOD Document::GetRelativePosition(Sci_Position positionStart, Sci_Position characterOffset) const {
1033 Sci::Position pos = positionStart;
1034 if (dbcsCodePage) {
1035 const int increment = (characterOffset > 0) ? 1 : -1;
1036 while (characterOffset != 0) {
1037 const Sci::Position posNext = NextPosition(pos, increment);
1038 if (posNext == pos)
1039 return Sci::invalidPosition;
1040 pos = posNext;
1041 characterOffset -= increment;
1043 } else {
1044 pos = positionStart + characterOffset;
1045 if ((pos < 0) || (pos > Length()))
1046 return Sci::invalidPosition;
1048 return pos;
1051 Sci::Position Document::GetRelativePositionUTF16(Sci::Position positionStart, Sci::Position characterOffset) const noexcept {
1052 Sci::Position pos = positionStart;
1053 if (dbcsCodePage) {
1054 const int increment = (characterOffset > 0) ? 1 : -1;
1055 while (characterOffset != 0) {
1056 const Sci::Position posNext = NextPosition(pos, increment);
1057 if (posNext == pos)
1058 return Sci::invalidPosition;
1059 if (std::abs(pos-posNext) > 3) // 4 byte character = 2*UTF16.
1060 characterOffset -= increment;
1061 pos = posNext;
1062 characterOffset -= increment;
1064 } else {
1065 pos = positionStart + characterOffset;
1066 if ((pos < 0) || (pos > LengthNoExcept()))
1067 return Sci::invalidPosition;
1069 return pos;
1072 int SCI_METHOD Document::GetCharacterAndWidth(Sci_Position position, Sci_Position *pWidth) const {
1073 int bytesInCharacter = 1;
1074 const unsigned char leadByte = cb.UCharAt(position);
1075 int character = leadByte;
1076 if (dbcsCodePage && !UTF8IsAscii(leadByte)) {
1077 if (CpUtf8 == dbcsCodePage) {
1078 const int widthCharBytes = UTF8BytesOfLead[leadByte];
1079 unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0};
1080 for (int b=1; b<widthCharBytes; b++)
1081 charBytes[b] = cb.UCharAt(position+b);
1082 const int utf8status = UTF8Classify(charBytes, widthCharBytes);
1083 if (utf8status & UTF8MaskInvalid) {
1084 // Report as singleton surrogate values which are invalid Unicode
1085 character = 0xDC80 + leadByte;
1086 } else {
1087 bytesInCharacter = utf8status & UTF8MaskWidth;
1088 character = UnicodeFromUTF8(charBytes);
1090 } else {
1091 if (IsDBCSLeadByteNoExcept(leadByte)) {
1092 const unsigned char trailByte = cb.UCharAt(position + 1);
1093 if (IsDBCSTrailByteNoExcept(trailByte)) {
1094 bytesInCharacter = 2;
1095 character = (leadByte << 8) | trailByte;
1100 if (pWidth) {
1101 *pWidth = bytesInCharacter;
1103 return character;
1106 int SCI_METHOD Document::CodePage() const {
1107 return dbcsCodePage;
1110 bool SCI_METHOD Document::IsDBCSLeadByte(char ch) const {
1111 // Used by lexers so must match IDocument method exactly
1112 return IsDBCSLeadByteNoExcept(ch);
1115 bool Document::IsDBCSLeadByteNoExcept(char ch) const noexcept {
1116 // Used inside core Scintilla
1117 // Byte ranges found in Wikipedia articles with relevant search strings in each case
1118 const unsigned char uch = ch;
1119 switch (dbcsCodePage) {
1120 case 932:
1121 // Shift_jis
1122 return ((uch >= 0x81) && (uch <= 0x9F)) ||
1123 ((uch >= 0xE0) && (uch <= 0xFC));
1124 // Lead bytes F0 to FC may be a Microsoft addition.
1125 case 936:
1126 // GBK
1127 return (uch >= 0x81) && (uch <= 0xFE);
1128 case 949:
1129 // Korean Wansung KS C-5601-1987
1130 return (uch >= 0x81) && (uch <= 0xFE);
1131 case 950:
1132 // Big5
1133 return (uch >= 0x81) && (uch <= 0xFE);
1134 case 1361:
1135 // Korean Johab KS C-5601-1992
1136 return
1137 ((uch >= 0x84) && (uch <= 0xD3)) ||
1138 ((uch >= 0xD8) && (uch <= 0xDE)) ||
1139 ((uch >= 0xE0) && (uch <= 0xF9));
1141 return false;
1144 bool Document::IsDBCSTrailByteNoExcept(char ch) const noexcept {
1145 const unsigned char trail = ch;
1146 switch (dbcsCodePage) {
1147 case 932:
1148 // Shift_jis
1149 return (trail != 0x7F) &&
1150 ((trail >= 0x40) && (trail <= 0xFC));
1151 case 936:
1152 // GBK
1153 return (trail != 0x7F) &&
1154 ((trail >= 0x40) && (trail <= 0xFE));
1155 case 949:
1156 // Korean Wansung KS C-5601-1987
1157 return
1158 ((trail >= 0x41) && (trail <= 0x5A)) ||
1159 ((trail >= 0x61) && (trail <= 0x7A)) ||
1160 ((trail >= 0x81) && (trail <= 0xFE));
1161 case 950:
1162 // Big5
1163 return
1164 ((trail >= 0x40) && (trail <= 0x7E)) ||
1165 ((trail >= 0xA1) && (trail <= 0xFE));
1166 case 1361:
1167 // Korean Johab KS C-5601-1992
1168 return
1169 ((trail >= 0x31) && (trail <= 0x7E)) ||
1170 ((trail >= 0x81) && (trail <= 0xFE));
1172 return false;
1175 int Document::DBCSDrawBytes(std::string_view text) const noexcept {
1176 if (text.length() <= 1) {
1177 return static_cast<int>(text.length());
1179 if (IsDBCSLeadByteNoExcept(text[0])) {
1180 return IsDBCSTrailByteNoExcept(text[1]) ? 2 : 1;
1181 } else {
1182 return 1;
1186 bool Document::IsDBCSDualByteAt(Sci::Position pos) const noexcept {
1187 return IsDBCSLeadByteNoExcept(cb.CharAt(pos))
1188 && IsDBCSTrailByteNoExcept(cb.CharAt(pos + 1));
1191 // Need to break text into segments near end but taking into account the
1192 // encoding to not break inside a UTF-8 or DBCS character and also trying
1193 // to avoid breaking inside a pair of combining characters, or inside
1194 // ligatures.
1195 // TODO: implement grapheme cluster boundaries,
1196 // see https://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries.
1198 // The segment length must always be long enough (more than 4 bytes)
1199 // so that there will be at least one whole character to make a segment.
1200 // For UTF-8, text must consist only of valid whole characters.
1201 // In preference order from best to worst:
1202 // 1) Break before or after spaces or controls
1203 // 2) Break at word and punctuation boundary for better kerning and ligature support
1204 // 3) Break after whole character, this may break combining characters
1206 size_t Document::SafeSegment(std::string_view text) const noexcept {
1207 // check space first as most written language use spaces.
1208 for (std::string_view::iterator it = text.end() - 1; it != text.begin(); --it) {
1209 if (IsBreakSpace(*it)) {
1210 return it - text.begin();
1214 if (!dbcsCodePage || dbcsCodePage == CpUtf8) {
1215 // backward iterate for UTF-8 and single byte encoding to find word and punctuation boundary.
1216 std::string_view::iterator it = text.end() - 1;
1217 const bool punctuation = IsPunctuation(*it);
1218 do {
1219 --it;
1220 if (punctuation != IsPunctuation(*it)) {
1221 return it - text.begin() + 1;
1223 } while (it != text.begin());
1225 it = text.end() - 1;
1226 if (dbcsCodePage) {
1227 // for UTF-8 go back to the start of last character.
1228 for (int trail = 0; trail < UTF8MaxBytes - 1 && UTF8IsTrailByte(*it); trail++) {
1229 --it;
1232 return it - text.begin();
1236 // forward iterate for DBCS to find word and punctuation boundary.
1237 size_t lastPunctuationBreak = 0;
1238 size_t lastEncodingAllowedBreak = 0;
1239 CharacterClass ccPrev = CharacterClass::space;
1240 for (size_t j = 0; j < text.length();) {
1241 const unsigned char ch = text[j];
1242 lastEncodingAllowedBreak = j++;
1244 CharacterClass cc = CharacterClass::word;
1245 if (UTF8IsAscii(ch)) {
1246 if (IsPunctuation(ch)) {
1247 cc = CharacterClass::punctuation;
1249 } else {
1250 j += IsDBCSLeadByteNoExcept(ch);
1252 if (cc != ccPrev) {
1253 ccPrev = cc;
1254 lastPunctuationBreak = lastEncodingAllowedBreak;
1257 return lastPunctuationBreak ? lastPunctuationBreak : lastEncodingAllowedBreak;
1261 EncodingFamily Document::CodePageFamily() const noexcept {
1262 if (CpUtf8 == dbcsCodePage)
1263 return EncodingFamily::unicode;
1264 else if (dbcsCodePage)
1265 return EncodingFamily::dbcs;
1266 else
1267 return EncodingFamily::eightBit;
1270 void Document::ModifiedAt(Sci::Position pos) noexcept {
1271 if (endStyled > pos)
1272 endStyled = pos;
1275 void Document::CheckReadOnly() {
1276 if (cb.IsReadOnly() && enteredReadOnlyCount == 0) {
1277 enteredReadOnlyCount++;
1278 NotifyModifyAttempt();
1279 enteredReadOnlyCount--;
1283 void Document::TrimReplacement(std::string_view &text, Range &range) const noexcept {
1284 while (!text.empty() && !range.Empty() && (text.front() == CharAt(range.start))) {
1285 text.remove_prefix(1);
1286 range.start++;
1288 while (!text.empty() && !range.Empty() && (text.back() == CharAt(range.end-1))) {
1289 text.remove_suffix(1);
1290 range.end--;
1294 // Document only modified by gateways DeleteChars, InsertString, Undo, Redo, and SetStyleAt.
1295 // SetStyleAt does not change the persistent state of a document
1297 bool Document::DeleteChars(Sci::Position pos, Sci::Position len) {
1298 if (pos < 0)
1299 return false;
1300 if (len <= 0)
1301 return false;
1302 if ((pos + len) > LengthNoExcept())
1303 return false;
1304 CheckReadOnly();
1305 if (enteredModification != 0) {
1306 return false;
1307 } else {
1308 enteredModification++;
1309 if (!cb.IsReadOnly()) {
1310 NotifyModified(
1311 DocModification(
1312 ModificationFlags::BeforeDelete | ModificationFlags::User,
1313 pos, len,
1314 0, nullptr));
1315 const Sci::Line prevLinesTotal = LinesTotal();
1316 const bool startSavePoint = cb.IsSavePoint();
1317 bool startSequence = false;
1318 const char *text = cb.DeleteChars(pos, len, startSequence);
1319 if (startSavePoint && cb.IsCollectingUndo())
1320 NotifySavePoint(false);
1321 if ((pos < LengthNoExcept()) || (pos == 0))
1322 ModifiedAt(pos);
1323 else
1324 ModifiedAt(pos-1);
1325 NotifyModified(
1326 DocModification(
1327 ModificationFlags::DeleteText | ModificationFlags::User |
1328 (startSequence?ModificationFlags::StartAction:ModificationFlags::None),
1329 pos, len,
1330 LinesTotal() - prevLinesTotal, text));
1332 enteredModification--;
1334 return !cb.IsReadOnly();
1338 * Insert a string with a length.
1340 Sci::Position Document::InsertString(Sci::Position position, const char *s, Sci::Position insertLength) {
1341 if (insertLength <= 0) {
1342 return 0;
1344 CheckReadOnly(); // Application may change read only state here
1345 if (cb.IsReadOnly()) {
1346 return 0;
1348 if (enteredModification != 0) {
1349 return 0;
1351 enteredModification++;
1352 insertionSet = false;
1353 insertion.clear();
1354 NotifyModified(
1355 DocModification(
1356 ModificationFlags::InsertCheck,
1357 position, insertLength,
1358 0, s));
1359 if (insertionSet) {
1360 s = insertion.c_str();
1361 insertLength = insertion.length();
1363 NotifyModified(
1364 DocModification(
1365 ModificationFlags::BeforeInsert | ModificationFlags::User,
1366 position, insertLength,
1367 0, s));
1368 const Sci::Line prevLinesTotal = LinesTotal();
1369 const bool startSavePoint = cb.IsSavePoint();
1370 bool startSequence = false;
1371 const char *text = cb.InsertString(position, s, insertLength, startSequence);
1372 if (startSavePoint && cb.IsCollectingUndo())
1373 NotifySavePoint(false);
1374 ModifiedAt(position);
1375 NotifyModified(
1376 DocModification(
1377 ModificationFlags::InsertText | ModificationFlags::User |
1378 (startSequence?ModificationFlags::StartAction:ModificationFlags::None),
1379 position, insertLength,
1380 LinesTotal() - prevLinesTotal, text));
1381 if (insertionSet) { // Free memory as could be large
1382 std::string().swap(insertion);
1384 enteredModification--;
1385 return insertLength;
1388 Sci::Position Document::InsertString(Sci::Position position, std::string_view sv) {
1389 return InsertString(position, sv.data(), sv.length());
1392 void Document::ChangeInsertion(const char *s, Sci::Position length) {
1393 insertionSet = true;
1394 insertion.assign(s, length);
1397 int SCI_METHOD Document::AddData(const char *data, Sci_Position length) {
1398 try {
1399 const Sci::Position position = Length();
1400 InsertString(position, data, length);
1401 } catch (std::bad_alloc &) {
1402 return static_cast<int>(Status::BadAlloc);
1403 } catch (...) {
1404 return static_cast<int>(Status::Failure);
1406 return static_cast<int>(Status::Ok);
1409 IDocumentEditable *Document::AsDocumentEditable() noexcept {
1410 return static_cast<IDocumentEditable *>(this);
1413 void *SCI_METHOD Document::ConvertToDocument() {
1414 return AsDocumentEditable();
1417 Sci::Position Document::Undo() {
1418 Sci::Position newPos = -1;
1419 CheckReadOnly();
1420 if ((enteredModification == 0) && (cb.IsCollectingUndo())) {
1421 enteredModification++;
1422 if (!cb.IsReadOnly()) {
1423 const bool startSavePoint = cb.IsSavePoint();
1424 bool multiLine = false;
1425 const int steps = cb.StartUndo();
1426 //Platform::DebugPrintf("Steps=%d\n", steps);
1427 Range coalescedRemove; // Default is empty at 0
1428 for (int step = 0; step < steps; step++) {
1429 const Sci::Line prevLinesTotal = LinesTotal();
1430 const Action action = cb.GetUndoStep();
1431 if (action.at == ActionType::remove) {
1432 NotifyModified(DocModification(
1433 ModificationFlags::BeforeInsert | ModificationFlags::Undo, action));
1434 } else if (action.at == ActionType::container) {
1435 DocModification dm(ModificationFlags::Container | ModificationFlags::Undo);
1436 dm.token = action.position;
1437 NotifyModified(dm);
1438 } else {
1439 NotifyModified(DocModification(
1440 ModificationFlags::BeforeDelete | ModificationFlags::Undo, action));
1442 cb.PerformUndoStep();
1443 if (action.at != ActionType::container) {
1444 ModifiedAt(action.position);
1445 newPos = action.position;
1448 ModificationFlags modFlags = ModificationFlags::Undo;
1449 // With undo, an insertion action becomes a deletion notification
1450 if (action.at == ActionType::remove) {
1451 newPos += action.lenData;
1452 modFlags |= ModificationFlags::InsertText;
1453 if (coalescedRemove.Contains(action.position)) {
1454 coalescedRemove.end += action.lenData;
1455 newPos = coalescedRemove.end;
1456 } else {
1457 coalescedRemove = Range(action.position, action.position + action.lenData);
1459 } else if (action.at == ActionType::insert) {
1460 modFlags |= ModificationFlags::DeleteText;
1461 coalescedRemove = Range();
1463 if (steps > 1)
1464 modFlags |= ModificationFlags::MultiStepUndoRedo;
1465 const Sci::Line linesAdded = LinesTotal() - prevLinesTotal;
1466 if (linesAdded != 0)
1467 multiLine = true;
1468 if (step == steps - 1) {
1469 modFlags |= ModificationFlags::LastStepInUndoRedo;
1470 if (multiLine)
1471 modFlags |= ModificationFlags::MultilineUndoRedo;
1473 NotifyModified(DocModification(modFlags, action.position, action.lenData,
1474 linesAdded, action.data));
1477 const bool endSavePoint = cb.IsSavePoint();
1478 if (startSavePoint != endSavePoint)
1479 NotifySavePoint(endSavePoint);
1481 enteredModification--;
1483 return newPos;
1486 Sci::Position Document::Redo() {
1487 Sci::Position newPos = -1;
1488 CheckReadOnly();
1489 if ((enteredModification == 0) && (cb.IsCollectingUndo())) {
1490 enteredModification++;
1491 if (!cb.IsReadOnly()) {
1492 const bool startSavePoint = cb.IsSavePoint();
1493 bool multiLine = false;
1494 const int steps = cb.StartRedo();
1495 for (int step = 0; step < steps; step++) {
1496 const Sci::Line prevLinesTotal = LinesTotal();
1497 const Action action = cb.GetRedoStep();
1498 if (action.at == ActionType::insert) {
1499 NotifyModified(DocModification(
1500 ModificationFlags::BeforeInsert | ModificationFlags::Redo, action));
1501 } else if (action.at == ActionType::container) {
1502 DocModification dm(ModificationFlags::Container | ModificationFlags::Redo);
1503 dm.token = action.position;
1504 NotifyModified(dm);
1505 } else {
1506 NotifyModified(DocModification(
1507 ModificationFlags::BeforeDelete | ModificationFlags::Redo, action));
1509 cb.PerformRedoStep();
1510 if (action.at != ActionType::container) {
1511 ModifiedAt(action.position);
1512 newPos = action.position;
1515 ModificationFlags modFlags = ModificationFlags::Redo;
1516 if (action.at == ActionType::insert) {
1517 newPos += action.lenData;
1518 modFlags |= ModificationFlags::InsertText;
1519 } else if (action.at == ActionType::remove) {
1520 modFlags |= ModificationFlags::DeleteText;
1522 if (steps > 1)
1523 modFlags |= ModificationFlags::MultiStepUndoRedo;
1524 const Sci::Line linesAdded = LinesTotal() - prevLinesTotal;
1525 if (linesAdded != 0)
1526 multiLine = true;
1527 if (step == steps - 1) {
1528 modFlags |= ModificationFlags::LastStepInUndoRedo;
1529 if (multiLine)
1530 modFlags |= ModificationFlags::MultilineUndoRedo;
1532 NotifyModified(
1533 DocModification(modFlags, action.position, action.lenData,
1534 linesAdded, action.data));
1537 const bool endSavePoint = cb.IsSavePoint();
1538 if (startSavePoint != endSavePoint)
1539 NotifySavePoint(endSavePoint);
1541 enteredModification--;
1543 return newPos;
1546 int Document::UndoSequenceDepth() const noexcept {
1547 return cb.UndoSequenceDepth();
1550 void Document::DelChar(Sci::Position pos) {
1551 DeleteChars(pos, LenChar(pos));
1554 void Document::DelCharBack(Sci::Position pos) {
1555 if (pos <= 0) {
1556 return;
1557 } else if (IsCrLf(pos - 2)) {
1558 DeleteChars(pos - 2, 2);
1559 } else if (dbcsCodePage) {
1560 const Sci::Position startChar = NextPosition(pos, -1);
1561 DeleteChars(startChar, pos - startChar);
1562 } else {
1563 DeleteChars(pos - 1, 1);
1567 static constexpr Sci::Position NextTab(Sci::Position pos, Sci::Position tabSize) noexcept {
1568 return ((pos / tabSize) + 1) * tabSize;
1571 static std::string CreateIndentation(Sci::Position indent, int tabSize, bool insertSpaces) {
1572 std::string indentation;
1573 if (!insertSpaces) {
1574 while (indent >= tabSize) {
1575 indentation += '\t';
1576 indent -= tabSize;
1579 while (indent > 0) {
1580 indentation += ' ';
1581 indent--;
1583 return indentation;
1586 int SCI_METHOD Document::GetLineIndentation(Sci_Position line) {
1587 int indent = 0;
1588 if ((line >= 0) && (line < LinesTotal())) {
1589 const Sci::Position lineStart = LineStart(line);
1590 const Sci::Position length = Length();
1591 for (Sci::Position i = lineStart; i < length; i++) {
1592 const char ch = cb.CharAt(i);
1593 if (ch == ' ')
1594 indent++;
1595 else if (ch == '\t')
1596 indent = static_cast<int>(NextTab(indent, tabInChars));
1597 else
1598 return indent;
1601 return indent;
1604 Sci::Position Document::SetLineIndentation(Sci::Line line, Sci::Position indent) {
1605 const int indentOfLine = GetLineIndentation(line);
1606 if (indent < 0)
1607 indent = 0;
1608 if (indent != indentOfLine) {
1609 const std::string linebuf = CreateIndentation(indent, tabInChars, !useTabs);
1610 const Sci::Position thisLineStart = LineStart(line);
1611 const Sci::Position indentPos = GetLineIndentPosition(line);
1612 UndoGroup ug(this);
1613 DeleteChars(thisLineStart, indentPos - thisLineStart);
1614 return thisLineStart + InsertString(thisLineStart, linebuf);
1615 } else {
1616 return GetLineIndentPosition(line);
1620 Sci::Position Document::GetLineIndentPosition(Sci::Line line) const {
1621 if (line < 0)
1622 return 0;
1623 Sci::Position pos = LineStart(line);
1624 const Sci::Position length = Length();
1625 while ((pos < length) && IsSpaceOrTab(cb.CharAt(pos))) {
1626 pos++;
1628 return pos;
1631 Sci::Position Document::GetColumn(Sci::Position pos) const {
1632 Sci::Position column = 0;
1633 const Sci::Line line = SciLineFromPosition(pos);
1634 if ((line >= 0) && (line < LinesTotal())) {
1635 for (Sci::Position i = LineStart(line); i < pos;) {
1636 const char ch = cb.CharAt(i);
1637 if (ch == '\t') {
1638 column = NextTab(column, tabInChars);
1639 i++;
1640 } else if (ch == '\r') {
1641 return column;
1642 } else if (ch == '\n') {
1643 return column;
1644 } else if (i >= Length()) {
1645 return column;
1646 } else if (UTF8IsAscii(ch)) {
1647 column++;
1648 i++;
1649 } else {
1650 column++;
1651 i = NextPosition(i, 1);
1655 return column;
1658 Sci::Position Document::CountCharacters(Sci::Position startPos, Sci::Position endPos) const noexcept {
1659 startPos = MovePositionOutsideChar(startPos, 1, false);
1660 endPos = MovePositionOutsideChar(endPos, -1, false);
1661 Sci::Position count = 0;
1662 Sci::Position i = startPos;
1663 while (i < endPos) {
1664 count++;
1665 i = NextPosition(i, 1);
1667 return count;
1670 Sci::Position Document::CountUTF16(Sci::Position startPos, Sci::Position endPos) const noexcept {
1671 startPos = MovePositionOutsideChar(startPos, 1, false);
1672 endPos = MovePositionOutsideChar(endPos, -1, false);
1673 Sci::Position count = 0;
1674 Sci::Position i = startPos;
1675 while (i < endPos) {
1676 count++;
1677 const Sci::Position next = NextPosition(i, 1);
1678 if ((next - i) > 3)
1679 count++;
1680 i = next;
1682 return count;
1685 Sci::Position Document::FindColumn(Sci::Line line, Sci::Position column) {
1686 Sci::Position position = LineStart(line);
1687 if ((line >= 0) && (line < LinesTotal())) {
1688 Sci::Position columnCurrent = 0;
1689 while ((columnCurrent < column) && (position < Length())) {
1690 const char ch = cb.CharAt(position);
1691 if (ch == '\t') {
1692 columnCurrent = NextTab(columnCurrent, tabInChars);
1693 if (columnCurrent > column)
1694 return position;
1695 position++;
1696 } else if (ch == '\r') {
1697 return position;
1698 } else if (ch == '\n') {
1699 return position;
1700 } else {
1701 columnCurrent++;
1702 position = NextPosition(position, 1);
1706 return position;
1709 void Document::Indent(bool forwards, Sci::Line lineBottom, Sci::Line lineTop) {
1710 // Dedent - suck white space off the front of the line to dedent by equivalent of a tab
1711 for (Sci::Line line = lineBottom; line >= lineTop; line--) {
1712 const Sci::Position indentOfLine = GetLineIndentation(line);
1713 if (forwards) {
1714 if (LineStart(line) < LineEnd(line)) {
1715 SetLineIndentation(line, indentOfLine + IndentSize());
1717 } else {
1718 SetLineIndentation(line, indentOfLine - IndentSize());
1723 namespace {
1725 constexpr std::string_view EOLForMode(EndOfLine eolMode) noexcept {
1726 switch (eolMode) {
1727 case EndOfLine::CrLf:
1728 return "\r\n";
1729 case EndOfLine::Cr:
1730 return "\r";
1731 default:
1732 return "\n";
1738 // Convert line endings for a piece of text to a particular mode.
1739 // Stop at len or when a NUL is found.
1740 std::string Document::TransformLineEnds(const char *s, size_t len, EndOfLine eolModeWanted) {
1741 std::string dest;
1742 const std::string_view eol = EOLForMode(eolModeWanted);
1743 for (size_t i = 0; (i < len) && (s[i]); i++) {
1744 if (s[i] == '\n' || s[i] == '\r') {
1745 dest.append(eol);
1746 if ((s[i] == '\r') && (i+1 < len) && (s[i+1] == '\n')) {
1747 i++;
1749 } else {
1750 dest.push_back(s[i]);
1753 return dest;
1756 void Document::ConvertLineEnds(EndOfLine eolModeSet) {
1757 UndoGroup ug(this);
1759 for (Sci::Position pos = 0; pos < Length(); pos++) {
1760 const char ch = cb.CharAt(pos);
1761 if (ch == '\r') {
1762 if (cb.CharAt(pos + 1) == '\n') {
1763 // CRLF
1764 if (eolModeSet == EndOfLine::Cr) {
1765 DeleteChars(pos + 1, 1); // Delete the LF
1766 } else if (eolModeSet == EndOfLine::Lf) {
1767 DeleteChars(pos, 1); // Delete the CR
1768 } else {
1769 pos++;
1771 } else {
1772 // CR
1773 if (eolModeSet == EndOfLine::CrLf) {
1774 pos += InsertString(pos + 1, "\n", 1); // Insert LF
1775 } else if (eolModeSet == EndOfLine::Lf) {
1776 pos += InsertString(pos, "\n", 1); // Insert LF
1777 DeleteChars(pos, 1); // Delete CR
1778 pos--;
1781 } else if (ch == '\n') {
1782 // LF
1783 if (eolModeSet == EndOfLine::CrLf) {
1784 pos += InsertString(pos, "\r", 1); // Insert CR
1785 } else if (eolModeSet == EndOfLine::Cr) {
1786 pos += InsertString(pos, "\r", 1); // Insert CR
1787 DeleteChars(pos, 1); // Delete LF
1788 pos--;
1795 std::string_view Document::EOLString() const noexcept {
1796 return EOLForMode(eolMode);
1799 DocumentOption Document::Options() const noexcept {
1800 return (IsLarge() ? DocumentOption::TextLarge : DocumentOption::Default) |
1801 (cb.HasStyles() ? DocumentOption::Default : DocumentOption::StylesNone);
1804 bool Document::IsWhiteLine(Sci::Line line) const {
1805 Sci::Position currentChar = LineStart(line);
1806 const Sci::Position endLine = LineEnd(line);
1807 while (currentChar < endLine) {
1808 if (!IsSpaceOrTab(cb.CharAt(currentChar))) {
1809 return false;
1811 ++currentChar;
1813 return true;
1816 Sci::Position Document::ParaUp(Sci::Position pos) const {
1817 Sci::Line line = SciLineFromPosition(pos);
1818 const Sci::Position start = LineStart(line);
1819 if (pos == start) {
1820 line--;
1822 while (line >= 0 && IsWhiteLine(line)) { // skip empty lines
1823 line--;
1825 while (line >= 0 && !IsWhiteLine(line)) { // skip non-empty lines
1826 line--;
1828 line++;
1829 return LineStart(line);
1832 Sci::Position Document::ParaDown(Sci::Position pos) const {
1833 Sci::Line line = SciLineFromPosition(pos);
1834 while (line < LinesTotal() && !IsWhiteLine(line)) { // skip non-empty lines
1835 line++;
1837 while (line < LinesTotal() && IsWhiteLine(line)) { // skip empty lines
1838 line++;
1840 if (line < LinesTotal())
1841 return LineStart(line);
1842 else // end of a document
1843 return LineEnd(line-1);
1846 CharacterClass Document::WordCharacterClass(unsigned int ch) const {
1847 if (dbcsCodePage && (ch >= 0x80)) {
1848 if (CpUtf8 == dbcsCodePage) {
1849 // Use hard coded Unicode class
1850 const CharacterCategory cc = charMap.CategoryFor(ch);
1851 switch (cc) {
1853 // Separator, Line/Paragraph
1854 case ccZl:
1855 case ccZp:
1856 return CharacterClass::newLine;
1858 // Separator, Space
1859 case ccZs:
1860 // Other
1861 case ccCc:
1862 case ccCf:
1863 case ccCs:
1864 case ccCo:
1865 case ccCn:
1866 return CharacterClass::space;
1868 // Letter
1869 case ccLu:
1870 case ccLl:
1871 case ccLt:
1872 case ccLm:
1873 case ccLo:
1874 // Number
1875 case ccNd:
1876 case ccNl:
1877 case ccNo:
1878 // Mark - includes combining diacritics
1879 case ccMn:
1880 case ccMc:
1881 case ccMe:
1882 return CharacterClass::word;
1884 // Punctuation
1885 case ccPc:
1886 case ccPd:
1887 case ccPs:
1888 case ccPe:
1889 case ccPi:
1890 case ccPf:
1891 case ccPo:
1892 // Symbol
1893 case ccSm:
1894 case ccSc:
1895 case ccSk:
1896 case ccSo:
1897 return CharacterClass::punctuation;
1900 } else {
1901 // Asian DBCS
1902 return CharacterClass::word;
1905 return charClass.GetClass(static_cast<unsigned char>(ch));
1909 * Used by commands that want to select whole words.
1910 * Finds the start of word at pos when delta < 0 or the end of the word when delta >= 0.
1912 Sci::Position Document::ExtendWordSelect(Sci::Position pos, int delta, bool onlyWordCharacters) const {
1913 CharacterClass ccStart = CharacterClass::word;
1914 if (delta < 0) {
1915 if (!onlyWordCharacters) {
1916 const CharacterExtracted ce = CharacterBefore(pos);
1917 ccStart = WordCharacterClass(ce.character);
1919 while (pos > 0) {
1920 const CharacterExtracted ce = CharacterBefore(pos);
1921 if (WordCharacterClass(ce.character) != ccStart)
1922 break;
1923 pos -= ce.widthBytes;
1925 } else {
1926 if (!onlyWordCharacters && pos < LengthNoExcept()) {
1927 const CharacterExtracted ce = CharacterAfter(pos);
1928 ccStart = WordCharacterClass(ce.character);
1930 while (pos < LengthNoExcept()) {
1931 const CharacterExtracted ce = CharacterAfter(pos);
1932 if (WordCharacterClass(ce.character) != ccStart)
1933 break;
1934 pos += ce.widthBytes;
1937 return MovePositionOutsideChar(pos, delta, true);
1941 * Find the start of the next word in either a forward (delta >= 0) or backwards direction
1942 * (delta < 0).
1943 * This is looking for a transition between character classes although there is also some
1944 * additional movement to transit white space.
1945 * Used by cursor movement by word commands.
1947 Sci::Position Document::NextWordStart(Sci::Position pos, int delta) const {
1948 if (delta < 0) {
1949 while (pos > 0) {
1950 const CharacterExtracted ce = CharacterBefore(pos);
1951 if (WordCharacterClass(ce.character) != CharacterClass::space)
1952 break;
1953 pos -= ce.widthBytes;
1955 if (pos > 0) {
1956 CharacterExtracted ce = CharacterBefore(pos);
1957 const CharacterClass ccStart = WordCharacterClass(ce.character);
1958 while (pos > 0) {
1959 ce = CharacterBefore(pos);
1960 if (WordCharacterClass(ce.character) != ccStart)
1961 break;
1962 pos -= ce.widthBytes;
1965 } else {
1966 CharacterExtracted ce = CharacterAfter(pos);
1967 const CharacterClass ccStart = WordCharacterClass(ce.character);
1968 while (pos < LengthNoExcept()) {
1969 ce = CharacterAfter(pos);
1970 if (WordCharacterClass(ce.character) != ccStart)
1971 break;
1972 pos += ce.widthBytes;
1974 while (pos < LengthNoExcept()) {
1975 ce = CharacterAfter(pos);
1976 if (WordCharacterClass(ce.character) != CharacterClass::space)
1977 break;
1978 pos += ce.widthBytes;
1981 return pos;
1985 * Find the end of the next word in either a forward (delta >= 0) or backwards direction
1986 * (delta < 0).
1987 * This is looking for a transition between character classes although there is also some
1988 * additional movement to transit white space.
1989 * Used by cursor movement by word commands.
1991 Sci::Position Document::NextWordEnd(Sci::Position pos, int delta) const {
1992 if (delta < 0) {
1993 if (pos > 0) {
1994 CharacterExtracted ce = CharacterBefore(pos);
1995 const CharacterClass ccStart = WordCharacterClass(ce.character);
1996 if (ccStart != CharacterClass::space) {
1997 while (pos > 0) {
1998 ce = CharacterBefore(pos);
1999 if (WordCharacterClass(ce.character) != ccStart)
2000 break;
2001 pos -= ce.widthBytes;
2004 while (pos > 0) {
2005 ce = CharacterBefore(pos);
2006 if (WordCharacterClass(ce.character) != CharacterClass::space)
2007 break;
2008 pos -= ce.widthBytes;
2011 } else {
2012 while (pos < LengthNoExcept()) {
2013 const CharacterExtracted ce = CharacterAfter(pos);
2014 if (WordCharacterClass(ce.character) != CharacterClass::space)
2015 break;
2016 pos += ce.widthBytes;
2018 if (pos < LengthNoExcept()) {
2019 CharacterExtracted ce = CharacterAfter(pos);
2020 const CharacterClass ccStart = WordCharacterClass(ce.character);
2021 while (pos < LengthNoExcept()) {
2022 ce = CharacterAfter(pos);
2023 if (WordCharacterClass(ce.character) != ccStart)
2024 break;
2025 pos += ce.widthBytes;
2029 return pos;
2032 namespace {
2034 constexpr bool IsWordEdge(CharacterClass cc, CharacterClass ccNext) noexcept {
2035 return (cc != ccNext) &&
2036 (cc == CharacterClass::word || cc == CharacterClass::punctuation);
2042 * Check that the character at the given position is a word or punctuation character and that
2043 * the previous character is of a different character class.
2045 bool Document::IsWordStartAt(Sci::Position pos) const {
2046 if (pos >= LengthNoExcept())
2047 return false;
2048 if (pos >= 0) {
2049 const CharacterExtracted cePos = CharacterAfter(pos);
2050 // At start of document, treat as if space before so can be word start
2051 const CharacterExtracted cePrev = (pos > 0) ?
2052 CharacterBefore(pos) : CharacterExtracted(' ', 1);
2053 return IsWordEdge(WordCharacterClass(cePos.character), WordCharacterClass(cePrev.character));
2055 return true;
2059 * Check that the character before the given position is a word or punctuation character and that
2060 * the next character is of a different character class.
2062 bool Document::IsWordEndAt(Sci::Position pos) const {
2063 if (pos <= 0)
2064 return false;
2065 if (pos <= LengthNoExcept()) {
2066 // At end of document, treat as if space after so can be word end
2067 const CharacterExtracted cePos = (pos < LengthNoExcept()) ?
2068 CharacterAfter(pos) : CharacterExtracted(' ', 1);
2069 const CharacterExtracted cePrev = CharacterBefore(pos);
2070 return IsWordEdge(WordCharacterClass(cePrev.character), WordCharacterClass(cePos.character));
2072 return true;
2076 * Check that the given range is has transitions between character classes at both
2077 * ends and where the characters on the inside are word or punctuation characters.
2079 bool Document::IsWordAt(Sci::Position start, Sci::Position end) const {
2080 return (start < end) && IsWordStartAt(start) && IsWordEndAt(end);
2083 bool Document::MatchesWordOptions(bool word, bool wordStart, Sci::Position pos, Sci::Position length) const {
2084 return (!word && !wordStart) ||
2085 (word && IsWordAt(pos, pos + length)) ||
2086 (wordStart && IsWordStartAt(pos));
2089 bool Document::HasCaseFolder() const noexcept {
2090 return pcf != nullptr;
2093 void Document::SetCaseFolder(std::unique_ptr<CaseFolder> pcf_) noexcept {
2094 pcf = std::move(pcf_);
2097 CharacterExtracted Document::ExtractCharacter(Sci::Position position) const noexcept {
2098 const unsigned char leadByte = cb.UCharAt(position);
2099 if (UTF8IsAscii(leadByte)) {
2100 // Common case: ASCII character
2101 return CharacterExtracted(leadByte, 1);
2103 const int widthCharBytes = UTF8BytesOfLead[leadByte];
2104 unsigned char charBytes[UTF8MaxBytes] = { leadByte, 0, 0, 0 };
2105 for (int b=1; b<widthCharBytes; b++)
2106 charBytes[b] = cb.UCharAt(position + b);
2107 return CharacterExtracted(charBytes, widthCharBytes);
2110 namespace {
2112 // Equivalent of memchr over the split view
2113 ptrdiff_t SplitFindChar(const SplitView &view, size_t start, size_t length, int ch) noexcept {
2114 size_t range1Length = 0;
2115 if (start < view.length1) {
2116 range1Length = std::min(length, view.length1 - start);
2117 const char *match = static_cast<const char *>(memchr(view.segment1 + start, ch, range1Length));
2118 if (match) {
2119 return match - view.segment1;
2121 start += range1Length;
2123 const char *match2 = static_cast<const char *>(memchr(view.segment2 + start, ch, length - range1Length));
2124 if (match2) {
2125 return match2 - view.segment2;
2127 return -1;
2130 // Equivalent of memcmp over the split view
2131 // This does not call memcmp as search texts are commonly too short to overcome the
2132 // call overhead.
2133 bool SplitMatch(const SplitView &view, size_t start, std::string_view text) noexcept {
2134 for (size_t i = 0; i < text.length(); i++) {
2135 if (view.CharAt(i + start) != text[i]) {
2136 return false;
2139 return true;
2145 * Find text in document, supporting both forward and backward
2146 * searches (just pass minPos > maxPos to do a backward search)
2147 * Has not been tested with backwards DBCS searches yet.
2149 Sci::Position Document::FindText(Sci::Position minPos, Sci::Position maxPos, const char *search,
2150 FindOption flags, Sci::Position *length) {
2151 if (*length <= 0)
2152 return minPos;
2153 const bool caseSensitive = FlagSet(flags, FindOption::MatchCase);
2154 const bool word = FlagSet(flags, FindOption::WholeWord);
2155 const bool wordStart = FlagSet(flags, FindOption::WordStart);
2156 const bool regExp = FlagSet(flags, FindOption::RegExp);
2157 if (regExp) {
2158 if (!regex)
2159 regex = std::unique_ptr<RegexSearchBase>(CreateRegexSearch(&charClass));
2160 return regex->FindText(this, minPos, maxPos, search, caseSensitive, word, wordStart, flags, length);
2161 } else {
2163 const bool forward = minPos <= maxPos;
2164 const int increment = forward ? 1 : -1;
2166 // Range endpoints should not be inside DBCS characters, but just in case, move them.
2167 const Sci::Position startPos = MovePositionOutsideChar(minPos, increment, false);
2168 const Sci::Position endPos = MovePositionOutsideChar(maxPos, increment, false);
2170 // Compute actual search ranges needed
2171 const Sci::Position lengthFind = *length;
2173 //Platform::DebugPrintf("Find %d %d %s %d\n", startPos, endPos, ft->lpstrText, lengthFind);
2174 const Sci::Position limitPos = std::max(startPos, endPos);
2175 Sci::Position pos = startPos;
2176 if (!forward) {
2177 // Back all of a character
2178 pos = NextPosition(pos, increment);
2180 const SplitView cbView = cb.AllView();
2181 if (caseSensitive) {
2182 const Sci::Position endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos;
2183 const unsigned char charStartSearch = search[0];
2184 if (forward && ((0 == dbcsCodePage) || (CpUtf8 == dbcsCodePage && !UTF8IsTrailByte(charStartSearch)))) {
2185 // This is a fast case where there is no need to test byte values to iterate
2186 // so becomes the equivalent of a memchr+memcmp loop.
2187 // UTF-8 search will not be self-synchronizing when starts with trail byte
2188 const std::string_view suffix(search + 1, lengthFind - 1);
2189 while (pos < endSearch) {
2190 pos = SplitFindChar(cbView, pos, limitPos - pos, charStartSearch);
2191 if (pos < 0) {
2192 break;
2194 if (SplitMatch(cbView, pos + 1, suffix) && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
2195 return pos;
2197 pos++;
2199 } else {
2200 while (forward ? (pos < endSearch) : (pos >= endSearch)) {
2201 const unsigned char leadByte = cbView.CharAt(pos);
2202 if (leadByte == charStartSearch) {
2203 bool found = (pos + lengthFind) <= limitPos;
2204 // SplitMatch could be called here but it is slower with g++ -O2
2205 for (int indexSearch = 1; (indexSearch < lengthFind) && found; indexSearch++) {
2206 found = cbView.CharAt(pos + indexSearch) == search[indexSearch];
2208 if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
2209 return pos;
2212 if (forward && UTF8IsAscii(leadByte)) {
2213 pos++;
2214 } else {
2215 if (dbcsCodePage) {
2216 if (!NextCharacter(pos, increment)) {
2217 break;
2219 } else {
2220 pos += increment;
2225 } else if (CpUtf8 == dbcsCodePage) {
2226 constexpr size_t maxFoldingExpansion = 4;
2227 std::vector<char> searchThing((lengthFind+1) * UTF8MaxBytes * maxFoldingExpansion + 1);
2228 const size_t lenSearch =
2229 pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind);
2230 while (forward ? (pos < endPos) : (pos >= endPos)) {
2231 int widthFirstCharacter = 1;
2232 Sci::Position posIndexDocument = pos;
2233 size_t indexSearch = 0;
2234 bool characterMatches = true;
2235 while (indexSearch < lenSearch) {
2236 const unsigned char leadByte = cbView.CharAt(posIndexDocument);
2237 int widthChar = 1;
2238 size_t lenFlat = 1;
2239 if (UTF8IsAscii(leadByte)) {
2240 if ((posIndexDocument + 1) > limitPos) {
2241 break;
2243 characterMatches = searchThing[indexSearch] == MakeLowerCase(leadByte);
2244 } else {
2245 char bytes[UTF8MaxBytes]{ static_cast<char>(leadByte) };
2246 const int widthCharBytes = UTF8BytesOfLead[leadByte];
2247 for (int b = 1; b < widthCharBytes; b++) {
2248 bytes[b] = cbView.CharAt(posIndexDocument + b);
2250 widthChar = UTF8Classify(bytes, widthCharBytes) & UTF8MaskWidth;
2251 if (!indexSearch) { // First character
2252 widthFirstCharacter = widthChar;
2254 if ((posIndexDocument + widthChar) > limitPos) {
2255 break;
2257 char folded[UTF8MaxBytes * maxFoldingExpansion + 1];
2258 lenFlat = pcf->Fold(folded, sizeof(folded), bytes, widthChar);
2259 // memcmp may examine lenFlat bytes in both arguments so assert it doesn't read past end of searchThing
2260 assert((indexSearch + lenFlat) <= searchThing.size());
2261 // Does folded match the buffer
2262 characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat);
2264 if (!characterMatches) {
2265 break;
2267 posIndexDocument += widthChar;
2268 indexSearch += lenFlat;
2270 if (characterMatches && (indexSearch == lenSearch)) {
2271 if (MatchesWordOptions(word, wordStart, pos, posIndexDocument - pos)) {
2272 *length = posIndexDocument - pos;
2273 return pos;
2276 if (forward) {
2277 pos += widthFirstCharacter;
2278 } else {
2279 if (!NextCharacter(pos, increment)) {
2280 break;
2284 } else if (dbcsCodePage) {
2285 constexpr size_t maxBytesCharacter = 2;
2286 constexpr size_t maxFoldingExpansion = 4;
2287 std::vector<char> searchThing((lengthFind+1) * maxBytesCharacter * maxFoldingExpansion + 1);
2288 const size_t lenSearch = pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind);
2289 while (forward ? (pos < endPos) : (pos >= endPos)) {
2290 int widthFirstCharacter = 0;
2291 Sci::Position indexDocument = 0;
2292 size_t indexSearch = 0;
2293 bool characterMatches = true;
2294 while (((pos + indexDocument) < limitPos) &&
2295 (indexSearch < lenSearch)) {
2296 const unsigned char leadByte = cbView.CharAt(pos + indexDocument);
2297 const int widthChar = (!UTF8IsAscii(leadByte) && IsDBCSLeadByteNoExcept(leadByte)) ? 2 : 1;
2298 if (!widthFirstCharacter) {
2299 widthFirstCharacter = widthChar;
2301 if ((pos + indexDocument + widthChar) > limitPos) {
2302 break;
2304 size_t lenFlat = 1;
2305 if (widthChar == 1) {
2306 characterMatches = searchThing[indexSearch] == MakeLowerCase(leadByte);
2307 } else {
2308 const char bytes[maxBytesCharacter + 1] {
2309 static_cast<char>(leadByte),
2310 cbView.CharAt(pos + indexDocument + 1)
2312 char folded[maxBytesCharacter * maxFoldingExpansion + 1];
2313 lenFlat = pcf->Fold(folded, sizeof(folded), bytes, widthChar);
2314 // memcmp may examine lenFlat bytes in both arguments so assert it doesn't read past end of searchThing
2315 assert((indexSearch + lenFlat) <= searchThing.size());
2316 // Does folded match the buffer
2317 characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat);
2319 if (!characterMatches) {
2320 break;
2322 indexDocument += widthChar;
2323 indexSearch += lenFlat;
2325 if (characterMatches && (indexSearch == lenSearch)) {
2326 if (MatchesWordOptions(word, wordStart, pos, indexDocument)) {
2327 *length = indexDocument;
2328 return pos;
2331 if (forward) {
2332 pos += widthFirstCharacter;
2333 } else {
2334 if (!NextCharacter(pos, increment)) {
2335 break;
2339 } else {
2340 const Sci::Position endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos;
2341 std::vector<char> searchThing(lengthFind + 1);
2342 pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind);
2343 while (forward ? (pos < endSearch) : (pos >= endSearch)) {
2344 bool found = (pos + lengthFind) <= limitPos;
2345 for (int indexSearch = 0; (indexSearch < lengthFind) && found; indexSearch++) {
2346 const char ch = cbView.CharAt(pos + indexSearch);
2347 const char chTest = searchThing[indexSearch];
2348 if (UTF8IsAscii(ch)) {
2349 found = chTest == MakeLowerCase(ch);
2350 } else {
2351 char folded[2];
2352 pcf->Fold(folded, sizeof(folded), &ch, 1);
2353 found = folded[0] == chTest;
2356 if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
2357 return pos;
2359 pos += increment;
2363 //Platform::DebugPrintf("Not found\n");
2364 return -1;
2367 const char *Document::SubstituteByPosition(const char *text, Sci::Position *length) {
2368 if (regex)
2369 return regex->SubstituteByPosition(this, text, length);
2370 else
2371 return nullptr;
2374 LineCharacterIndexType Document::LineCharacterIndex() const noexcept {
2375 return cb.LineCharacterIndex();
2378 void Document::AllocateLineCharacterIndex(LineCharacterIndexType lineCharacterIndex) {
2379 return cb.AllocateLineCharacterIndex(lineCharacterIndex);
2382 void Document::ReleaseLineCharacterIndex(LineCharacterIndexType lineCharacterIndex) {
2383 return cb.ReleaseLineCharacterIndex(lineCharacterIndex);
2386 Sci::Line Document::LinesTotal() const noexcept {
2387 return cb.Lines();
2390 void Document::AllocateLines(Sci::Line lines) {
2391 cb.AllocateLines(lines);
2394 void Document::SetDefaultCharClasses(bool includeWordClass) {
2395 charClass.SetDefaultCharClasses(includeWordClass);
2398 void Document::SetCharClasses(const unsigned char *chars, CharacterClass newCharClass) {
2399 charClass.SetCharClasses(chars, newCharClass);
2402 int Document::GetCharsOfClass(CharacterClass characterClass, unsigned char *buffer) const {
2403 return charClass.GetCharsOfClass(characterClass, buffer);
2406 void Document::SetCharacterCategoryOptimization(int countCharacters) {
2407 charMap.Optimize(countCharacters);
2410 int Document::CharacterCategoryOptimization() const noexcept {
2411 return charMap.Size();
2414 void SCI_METHOD Document::StartStyling(Sci_Position position) {
2415 endStyled = position;
2418 bool SCI_METHOD Document::SetStyleFor(Sci_Position length, char style) {
2419 if (enteredStyling != 0) {
2420 return false;
2421 } else {
2422 enteredStyling++;
2423 const Sci::Position prevEndStyled = endStyled;
2424 if (cb.SetStyleFor(endStyled, length, style)) {
2425 const DocModification mh(ModificationFlags::ChangeStyle | ModificationFlags::User,
2426 prevEndStyled, length);
2427 NotifyModified(mh);
2429 endStyled += length;
2430 enteredStyling--;
2431 return true;
2435 bool SCI_METHOD Document::SetStyles(Sci_Position length, const char *styles) {
2436 if (enteredStyling != 0) {
2437 return false;
2438 } else {
2439 enteredStyling++;
2440 bool didChange = false;
2441 Sci::Position startMod = 0;
2442 Sci::Position endMod = 0;
2443 for (int iPos = 0; iPos < length; iPos++, endStyled++) {
2444 PLATFORM_ASSERT(endStyled < Length());
2445 if (cb.SetStyleAt(endStyled, styles[iPos])) {
2446 if (!didChange) {
2447 startMod = endStyled;
2449 didChange = true;
2450 endMod = endStyled;
2453 if (didChange) {
2454 const DocModification mh(ModificationFlags::ChangeStyle | ModificationFlags::User,
2455 startMod, endMod - startMod + 1);
2456 NotifyModified(mh);
2458 enteredStyling--;
2459 return true;
2463 void Document::EnsureStyledTo(Sci::Position pos) {
2464 if ((enteredStyling == 0) && (pos > GetEndStyled())) {
2465 IncrementStyleClock();
2466 if (pli && !pli->UseContainerLexing()) {
2467 const Sci::Position endStyledTo = LineStartPosition(GetEndStyled());
2468 pli->Colourise(endStyledTo, pos);
2469 } else {
2470 // Ask the watchers to style, and stop as soon as one responds.
2471 for (std::vector<WatcherWithUserData>::iterator it = watchers.begin();
2472 (pos > GetEndStyled()) && (it != watchers.end()); ++it) {
2473 it->watcher->NotifyStyleNeeded(this, it->userData, pos);
2479 void Document::StyleToAdjustingLineDuration(Sci::Position pos) {
2480 const Sci::Position stylingStart = GetEndStyled();
2481 ElapsedPeriod epStyling;
2482 EnsureStyledTo(pos);
2483 durationStyleOneByte.AddSample(pos - stylingStart, epStyling.Duration());
2486 LexInterface *Document::GetLexInterface() const noexcept {
2487 return pli.get();
2490 void Document::SetLexInterface(std::unique_ptr<LexInterface> pLexInterface) noexcept {
2491 pli = std::move(pLexInterface);
2494 int SCI_METHOD Document::SetLineState(Sci_Position line, int state) {
2495 const int statePrevious = States()->SetLineState(line, state, LinesTotal());
2496 if (state != statePrevious) {
2497 const DocModification mh(ModificationFlags::ChangeLineState, LineStart(line), 0, 0, nullptr,
2498 static_cast<Sci::Line>(line));
2499 NotifyModified(mh);
2501 return statePrevious;
2504 int SCI_METHOD Document::GetLineState(Sci_Position line) const {
2505 return States()->GetLineState(line);
2508 Sci::Line Document::GetMaxLineState() const noexcept {
2509 return States()->GetMaxLineState();
2512 void SCI_METHOD Document::ChangeLexerState(Sci_Position start, Sci_Position end) {
2513 const DocModification mh(ModificationFlags::LexerState, start,
2514 end-start, 0, nullptr, 0);
2515 NotifyModified(mh);
2518 StyledText Document::MarginStyledText(Sci::Line line) const noexcept {
2519 const LineAnnotation *pla = Margins();
2520 return StyledText(pla->Length(line), pla->Text(line),
2521 pla->MultipleStyles(line), pla->Style(line), pla->Styles(line));
2524 void Document::MarginSetText(Sci::Line line, const char *text) {
2525 Margins()->SetText(line, text);
2526 const DocModification mh(ModificationFlags::ChangeMargin, LineStart(line),
2527 0, 0, nullptr, line);
2528 NotifyModified(mh);
2531 void Document::MarginSetStyle(Sci::Line line, int style) {
2532 Margins()->SetStyle(line, style);
2533 NotifyModified(DocModification(ModificationFlags::ChangeMargin, LineStart(line),
2534 0, 0, nullptr, line));
2537 void Document::MarginSetStyles(Sci::Line line, const unsigned char *styles) {
2538 Margins()->SetStyles(line, styles);
2539 NotifyModified(DocModification(ModificationFlags::ChangeMargin, LineStart(line),
2540 0, 0, nullptr, line));
2543 void Document::MarginClearAll() {
2544 const Sci::Line maxEditorLine = LinesTotal();
2545 for (Sci::Line l=0; l<maxEditorLine; l++)
2546 MarginSetText(l, nullptr);
2547 // Free remaining data
2548 Margins()->ClearAll();
2551 StyledText Document::AnnotationStyledText(Sci::Line line) const noexcept {
2552 const LineAnnotation *pla = Annotations();
2553 return StyledText(pla->Length(line), pla->Text(line),
2554 pla->MultipleStyles(line), pla->Style(line), pla->Styles(line));
2557 void Document::AnnotationSetText(Sci::Line line, const char *text) {
2558 if (line >= 0 && line < LinesTotal()) {
2559 const Sci::Line linesBefore = AnnotationLines(line);
2560 Annotations()->SetText(line, text);
2561 const int linesAfter = AnnotationLines(line);
2562 DocModification mh(ModificationFlags::ChangeAnnotation, LineStart(line),
2563 0, 0, nullptr, line);
2564 mh.annotationLinesAdded = linesAfter - linesBefore;
2565 NotifyModified(mh);
2569 void Document::AnnotationSetStyle(Sci::Line line, int style) {
2570 if (line >= 0 && line < LinesTotal()) {
2571 Annotations()->SetStyle(line, style);
2572 const DocModification mh(ModificationFlags::ChangeAnnotation, LineStart(line),
2573 0, 0, nullptr, line);
2574 NotifyModified(mh);
2578 void Document::AnnotationSetStyles(Sci::Line line, const unsigned char *styles) {
2579 if (line >= 0 && line < LinesTotal()) {
2580 Annotations()->SetStyles(line, styles);
2584 int Document::AnnotationLines(Sci::Line line) const noexcept {
2585 return Annotations()->Lines(line);
2588 void Document::AnnotationClearAll() {
2589 if (Annotations()->Empty()) {
2590 return;
2592 const Sci::Line maxEditorLine = LinesTotal();
2593 for (Sci::Line l=0; l<maxEditorLine; l++)
2594 AnnotationSetText(l, nullptr);
2595 // Free remaining data
2596 Annotations()->ClearAll();
2599 StyledText Document::EOLAnnotationStyledText(Sci::Line line) const noexcept {
2600 const LineAnnotation *pla = EOLAnnotations();
2601 return StyledText(pla->Length(line), pla->Text(line),
2602 pla->MultipleStyles(line), pla->Style(line), pla->Styles(line));
2605 void Document::EOLAnnotationSetText(Sci::Line line, const char *text) {
2606 if (line >= 0 && line < LinesTotal()) {
2607 EOLAnnotations()->SetText(line, text);
2608 const DocModification mh(ModificationFlags::ChangeEOLAnnotation, LineStart(line),
2609 0, 0, nullptr, line);
2610 NotifyModified(mh);
2614 void Document::EOLAnnotationSetStyle(Sci::Line line, int style) {
2615 if (line >= 0 && line < LinesTotal()) {
2616 EOLAnnotations()->SetStyle(line, style);
2617 const DocModification mh(ModificationFlags::ChangeEOLAnnotation, LineStart(line),
2618 0, 0, nullptr, line);
2619 NotifyModified(mh);
2623 void Document::EOLAnnotationClearAll() {
2624 if (EOLAnnotations()->Empty()) {
2625 return;
2627 const Sci::Line maxEditorLine = LinesTotal();
2628 for (Sci::Line l=0; l<maxEditorLine; l++)
2629 EOLAnnotationSetText(l, nullptr);
2630 // Free remaining data
2631 EOLAnnotations()->ClearAll();
2634 void Document::IncrementStyleClock() noexcept {
2635 styleClock = (styleClock + 1) % 0x100000;
2638 void SCI_METHOD Document::DecorationSetCurrentIndicator(int indicator) {
2639 decorations->SetCurrentIndicator(indicator);
2642 void SCI_METHOD Document::DecorationFillRange(Sci_Position position, int value, Sci_Position fillLength) {
2643 const FillResult<Sci::Position> fr = decorations->FillRange(
2644 position, value, fillLength);
2645 if (fr.changed) {
2646 const DocModification mh(ModificationFlags::ChangeIndicator | ModificationFlags::User,
2647 fr.position, fr.fillLength);
2648 NotifyModified(mh);
2652 bool Document::AddWatcher(DocWatcher *watcher, void *userData) {
2653 const WatcherWithUserData wwud(watcher, userData);
2654 std::vector<WatcherWithUserData>::iterator it =
2655 std::find(watchers.begin(), watchers.end(), wwud);
2656 if (it != watchers.end())
2657 return false;
2658 watchers.push_back(wwud);
2659 return true;
2662 bool Document::RemoveWatcher(DocWatcher *watcher, void *userData) noexcept {
2663 try {
2664 // This can never fail as WatcherWithUserData constructor and == are noexcept
2665 // but std::find is not noexcept.
2666 std::vector<WatcherWithUserData>::iterator it =
2667 std::find(watchers.begin(), watchers.end(), WatcherWithUserData(watcher, userData));
2668 if (it != watchers.end()) {
2669 watchers.erase(it);
2670 return true;
2672 } catch (...) {
2673 // Ignore any exception
2675 return false;
2678 void Document::NotifyModifyAttempt() {
2679 for (const WatcherWithUserData &watcher : watchers) {
2680 watcher.watcher->NotifyModifyAttempt(this, watcher.userData);
2684 void Document::NotifySavePoint(bool atSavePoint) {
2685 for (const WatcherWithUserData &watcher : watchers) {
2686 watcher.watcher->NotifySavePoint(this, watcher.userData, atSavePoint);
2690 void Document::NotifyModified(DocModification mh) {
2691 if (FlagSet(mh.modificationType, ModificationFlags::InsertText)) {
2692 decorations->InsertSpace(mh.position, mh.length);
2693 } else if (FlagSet(mh.modificationType, ModificationFlags::DeleteText)) {
2694 decorations->DeleteRange(mh.position, mh.length);
2696 for (const WatcherWithUserData &watcher : watchers) {
2697 watcher.watcher->NotifyModified(this, mh, watcher.userData);
2701 bool Document::IsWordPartSeparator(unsigned int ch) const {
2702 return (WordCharacterClass(ch) == CharacterClass::word) && IsPunctuation(ch);
2705 Sci::Position Document::WordPartLeft(Sci::Position pos) const {
2706 if (pos > 0) {
2707 pos -= CharacterBefore(pos).widthBytes;
2708 CharacterExtracted ceStart = CharacterAfter(pos);
2709 if (IsWordPartSeparator(ceStart.character)) {
2710 while (pos > 0 && IsWordPartSeparator(CharacterAfter(pos).character)) {
2711 pos -= CharacterBefore(pos).widthBytes;
2714 if (pos > 0) {
2715 ceStart = CharacterAfter(pos);
2716 pos -= CharacterBefore(pos).widthBytes;
2717 if (IsLowerCase(ceStart.character)) {
2718 while (pos > 0 && IsLowerCase(CharacterAfter(pos).character))
2719 pos -= CharacterBefore(pos).widthBytes;
2720 if (!IsUpperCase(CharacterAfter(pos).character) && !IsLowerCase(CharacterAfter(pos).character))
2721 pos += CharacterAfter(pos).widthBytes;
2722 } else if (IsUpperCase(ceStart.character)) {
2723 while (pos > 0 && IsUpperCase(CharacterAfter(pos).character))
2724 pos -= CharacterBefore(pos).widthBytes;
2725 if (!IsUpperCase(CharacterAfter(pos).character))
2726 pos += CharacterAfter(pos).widthBytes;
2727 } else if (IsADigit(ceStart.character)) {
2728 while (pos > 0 && IsADigit(CharacterAfter(pos).character))
2729 pos -= CharacterBefore(pos).widthBytes;
2730 if (!IsADigit(CharacterAfter(pos).character))
2731 pos += CharacterAfter(pos).widthBytes;
2732 } else if (IsPunctuation(ceStart.character)) {
2733 while (pos > 0 && IsPunctuation(CharacterAfter(pos).character))
2734 pos -= CharacterBefore(pos).widthBytes;
2735 if (!IsPunctuation(CharacterAfter(pos).character))
2736 pos += CharacterAfter(pos).widthBytes;
2737 } else if (IsASpace(ceStart.character)) {
2738 while (pos > 0 && IsASpace(CharacterAfter(pos).character))
2739 pos -= CharacterBefore(pos).widthBytes;
2740 if (!IsASpace(CharacterAfter(pos).character))
2741 pos += CharacterAfter(pos).widthBytes;
2742 } else if (!IsASCII(ceStart.character)) {
2743 while (pos > 0 && !IsASCII(CharacterAfter(pos).character))
2744 pos -= CharacterBefore(pos).widthBytes;
2745 if (IsASCII(CharacterAfter(pos).character))
2746 pos += CharacterAfter(pos).widthBytes;
2747 } else {
2748 pos += CharacterAfter(pos).widthBytes;
2752 return pos;
2755 Sci::Position Document::WordPartRight(Sci::Position pos) const {
2756 CharacterExtracted ceStart = CharacterAfter(pos);
2757 const Sci::Position length = LengthNoExcept();
2758 if (IsWordPartSeparator(ceStart.character)) {
2759 while (pos < length && IsWordPartSeparator(CharacterAfter(pos).character))
2760 pos += CharacterAfter(pos).widthBytes;
2761 ceStart = CharacterAfter(pos);
2763 if (!IsASCII(ceStart.character)) {
2764 while (pos < length && !IsASCII(CharacterAfter(pos).character))
2765 pos += CharacterAfter(pos).widthBytes;
2766 } else if (IsLowerCase(ceStart.character)) {
2767 while (pos < length && IsLowerCase(CharacterAfter(pos).character))
2768 pos += CharacterAfter(pos).widthBytes;
2769 } else if (IsUpperCase(ceStart.character)) {
2770 if (IsLowerCase(CharacterAfter(pos + ceStart.widthBytes).character)) {
2771 pos += CharacterAfter(pos).widthBytes;
2772 while (pos < length && IsLowerCase(CharacterAfter(pos).character))
2773 pos += CharacterAfter(pos).widthBytes;
2774 } else {
2775 while (pos < length && IsUpperCase(CharacterAfter(pos).character))
2776 pos += CharacterAfter(pos).widthBytes;
2778 if (IsLowerCase(CharacterAfter(pos).character) && IsUpperCase(CharacterBefore(pos).character))
2779 pos -= CharacterBefore(pos).widthBytes;
2780 } else if (IsADigit(ceStart.character)) {
2781 while (pos < length && IsADigit(CharacterAfter(pos).character))
2782 pos += CharacterAfter(pos).widthBytes;
2783 } else if (IsPunctuation(ceStart.character)) {
2784 while (pos < length && IsPunctuation(CharacterAfter(pos).character))
2785 pos += CharacterAfter(pos).widthBytes;
2786 } else if (IsASpace(ceStart.character)) {
2787 while (pos < length && IsASpace(CharacterAfter(pos).character))
2788 pos += CharacterAfter(pos).widthBytes;
2789 } else {
2790 pos += CharacterAfter(pos).widthBytes;
2792 return pos;
2795 Sci::Position Document::ExtendStyleRange(Sci::Position pos, int delta, bool singleLine) noexcept {
2796 const char sStart = cb.StyleAt(pos);
2797 if (delta < 0) {
2798 while (pos > 0 && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsEOLCharacter(cb.CharAt(pos))))
2799 pos--;
2800 pos++;
2801 } else {
2802 while (pos < (LengthNoExcept()) && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsEOLCharacter(cb.CharAt(pos))))
2803 pos++;
2805 return pos;
2808 static char BraceOpposite(char ch) noexcept {
2809 switch (ch) {
2810 case '(':
2811 return ')';
2812 case ')':
2813 return '(';
2814 case '[':
2815 return ']';
2816 case ']':
2817 return '[';
2818 case '{':
2819 return '}';
2820 case '}':
2821 return '{';
2822 case '<':
2823 return '>';
2824 case '>':
2825 return '<';
2826 default:
2827 return '\0';
2831 // TODO: should be able to extend styled region to find matching brace
2832 Sci::Position Document::BraceMatch(Sci::Position position, Sci::Position /*maxReStyle*/, Sci::Position startPos, bool useStartPos) noexcept {
2833 const char chBrace = CharAt(position);
2834 const char chSeek = BraceOpposite(chBrace);
2835 if (chSeek == '\0')
2836 return - 1;
2837 const int styBrace = StyleIndexAt(position);
2838 int direction = -1;
2839 if (chBrace == '(' || chBrace == '[' || chBrace == '{' || chBrace == '<')
2840 direction = 1;
2841 int depth = 1;
2842 position = useStartPos ? startPos : NextPosition(position, direction);
2843 while ((position >= 0) && (position < LengthNoExcept())) {
2844 const char chAtPos = CharAt(position);
2845 const int styAtPos = StyleIndexAt(position);
2846 if ((position > GetEndStyled()) || (styAtPos == styBrace)) {
2847 if (chAtPos == chBrace)
2848 depth++;
2849 if (chAtPos == chSeek)
2850 depth--;
2851 if (depth == 0)
2852 return position;
2854 const Sci::Position positionBeforeMove = position;
2855 position = NextPosition(position, direction);
2856 if (position == positionBeforeMove)
2857 break;
2859 return - 1;
2863 * Implementation of RegexSearchBase for the default built-in regular expression engine
2865 class BuiltinRegex : public RegexSearchBase {
2866 public:
2867 explicit BuiltinRegex(CharClassify *charClassTable) : search(charClassTable) {}
2869 Sci::Position FindText(Document *doc, Sci::Position minPos, Sci::Position maxPos, const char *s,
2870 bool caseSensitive, bool word, bool wordStart, FindOption flags,
2871 Sci::Position *length) override;
2873 const char *SubstituteByPosition(Document *doc, const char *text, Sci::Position *length) override;
2875 private:
2876 RESearch search;
2877 std::string substituted;
2880 namespace {
2883 * RESearchRange keeps track of search range.
2885 class RESearchRange {
2886 public:
2887 const Document *doc;
2888 int increment;
2889 Sci::Position startPos;
2890 Sci::Position endPos;
2891 Sci::Line lineRangeStart;
2892 Sci::Line lineRangeEnd;
2893 Sci::Line lineRangeBreak;
2894 RESearchRange(const Document *doc_, Sci::Position minPos, Sci::Position maxPos) noexcept : doc(doc_) {
2895 increment = (minPos <= maxPos) ? 1 : -1;
2897 // Range endpoints should not be inside DBCS characters or between a CR and LF,
2898 // but just in case, move them.
2899 startPos = doc->MovePositionOutsideChar(minPos, 1, true);
2900 endPos = doc->MovePositionOutsideChar(maxPos, 1, true);
2902 lineRangeStart = doc->SciLineFromPosition(startPos);
2903 lineRangeEnd = doc->SciLineFromPosition(endPos);
2904 lineRangeBreak = lineRangeEnd + increment;
2906 Range LineRange(Sci::Line line, Sci::Position lineStartPos, Sci::Position lineEndPos) const noexcept {
2907 Range range(lineStartPos, lineEndPos);
2908 if (increment == 1) {
2909 if (line == lineRangeStart)
2910 range.start = startPos;
2911 if (line == lineRangeEnd)
2912 range.end = endPos;
2913 } else {
2914 if (line == lineRangeEnd)
2915 range.start = endPos;
2916 if (line == lineRangeStart)
2917 range.end = startPos;
2919 return range;
2923 // Define a way for the Regular Expression code to access the document
2924 class DocumentIndexer final : public CharacterIndexer {
2925 Document *pdoc;
2926 Sci::Position end;
2927 public:
2928 DocumentIndexer(Document *pdoc_, Sci::Position end_) noexcept :
2929 pdoc(pdoc_), end(end_) {
2932 char CharAt(Sci::Position index) const noexcept override {
2933 if (index < 0 || index >= end)
2934 return 0;
2935 else
2936 return pdoc->CharAt(index);
2938 Sci::Position MovePositionOutsideChar(Sci::Position pos, Sci::Position moveDir) const noexcept override {
2939 return pdoc->MovePositionOutsideChar(pos, moveDir, false);
2943 #ifndef NO_CXX11_REGEX
2945 class ByteIterator {
2946 public:
2947 using iterator_category = std::bidirectional_iterator_tag;
2948 using value_type = char;
2949 using difference_type = ptrdiff_t;
2950 using pointer = char*;
2951 using reference = char&;
2953 const Document *doc;
2954 Sci::Position position;
2956 explicit ByteIterator(const Document *doc_=nullptr, Sci::Position position_=0) noexcept :
2957 doc(doc_), position(position_) {
2959 char operator*() const noexcept {
2960 return doc->CharAt(position);
2962 ByteIterator &operator++() noexcept {
2963 position++;
2964 return *this;
2966 ByteIterator operator++(int) noexcept {
2967 ByteIterator retVal(*this);
2968 position++;
2969 return retVal;
2971 ByteIterator &operator--() noexcept {
2972 position--;
2973 return *this;
2975 bool operator==(const ByteIterator &other) const noexcept {
2976 return doc == other.doc && position == other.position;
2978 bool operator!=(const ByteIterator &other) const noexcept {
2979 return doc != other.doc || position != other.position;
2981 Sci::Position Pos() const noexcept {
2982 return position;
2984 Sci::Position PosRoundUp() const noexcept {
2985 return position;
2989 // On Windows, wchar_t is 16 bits wide and on Unix it is 32 bits wide.
2990 // Would be better to use sizeof(wchar_t) or similar to differentiate
2991 // but easier for now to hard-code platforms.
2992 // C++11 has char16_t and char32_t but neither Clang nor Visual C++
2993 // appear to allow specializing basic_regex over these.
2995 #ifdef _WIN32
2996 #define WCHAR_T_IS_16 1
2997 #else
2998 #define WCHAR_T_IS_16 0
2999 #endif
3001 #if WCHAR_T_IS_16
3003 // On Windows, report non-BMP characters as 2 separate surrogates as that
3004 // matches wregex since it is based on wchar_t.
3005 class UTF8Iterator {
3006 // These 3 fields determine the iterator position and are used for comparisons
3007 const Document *doc;
3008 Sci::Position position;
3009 size_t characterIndex;
3010 // Remaining fields are derived from the determining fields so are excluded in comparisons
3011 unsigned int lenBytes;
3012 size_t lenCharacters;
3013 wchar_t buffered[2];
3014 public:
3015 using iterator_category = std::bidirectional_iterator_tag;
3016 using value_type = wchar_t;
3017 using difference_type = ptrdiff_t;
3018 using pointer = wchar_t*;
3019 using reference = wchar_t&;
3021 explicit UTF8Iterator(const Document *doc_=nullptr, Sci::Position position_=0) noexcept :
3022 doc(doc_), position(position_), characterIndex(0), lenBytes(0), lenCharacters(0), buffered{} {
3023 buffered[0] = 0;
3024 buffered[1] = 0;
3025 if (doc) {
3026 ReadCharacter();
3029 wchar_t operator*() const noexcept {
3030 assert(lenCharacters != 0);
3031 return buffered[characterIndex];
3033 UTF8Iterator &operator++() noexcept {
3034 if ((characterIndex + 1) < (lenCharacters)) {
3035 characterIndex++;
3036 } else {
3037 position += lenBytes;
3038 ReadCharacter();
3039 characterIndex = 0;
3041 return *this;
3043 UTF8Iterator operator++(int) noexcept {
3044 UTF8Iterator retVal(*this);
3045 if ((characterIndex + 1) < (lenCharacters)) {
3046 characterIndex++;
3047 } else {
3048 position += lenBytes;
3049 ReadCharacter();
3050 characterIndex = 0;
3052 return retVal;
3054 UTF8Iterator &operator--() noexcept {
3055 if (characterIndex) {
3056 characterIndex--;
3057 } else {
3058 position = doc->NextPosition(position, -1);
3059 ReadCharacter();
3060 characterIndex = lenCharacters - 1;
3062 return *this;
3064 bool operator==(const UTF8Iterator &other) const noexcept {
3065 // Only test the determining fields, not the character widths and values derived from this
3066 return doc == other.doc &&
3067 position == other.position &&
3068 characterIndex == other.characterIndex;
3070 bool operator!=(const UTF8Iterator &other) const noexcept {
3071 // Only test the determining fields, not the character widths and values derived from this
3072 return doc != other.doc ||
3073 position != other.position ||
3074 characterIndex != other.characterIndex;
3076 Sci::Position Pos() const noexcept {
3077 return position;
3079 Sci::Position PosRoundUp() const noexcept {
3080 if (characterIndex)
3081 return position + lenBytes; // Force to end of character
3082 else
3083 return position;
3085 private:
3086 void ReadCharacter() noexcept {
3087 const CharacterExtracted charExtracted = doc->ExtractCharacter(position);
3088 lenBytes = charExtracted.widthBytes;
3089 if (charExtracted.character == unicodeReplacementChar) {
3090 lenCharacters = 1;
3091 buffered[0] = static_cast<wchar_t>(charExtracted.character);
3092 } else {
3093 lenCharacters = UTF16FromUTF32Character(charExtracted.character, buffered);
3098 #else
3100 // On Unix, report non-BMP characters as single characters
3102 class UTF8Iterator {
3103 const Document *doc;
3104 Sci::Position position;
3105 public:
3106 using iterator_category = std::bidirectional_iterator_tag;
3107 using value_type = wchar_t;
3108 using difference_type = ptrdiff_t;
3109 using pointer = wchar_t*;
3110 using reference = wchar_t&;
3112 explicit UTF8Iterator(const Document *doc_=nullptr, Sci::Position position_=0) noexcept :
3113 doc(doc_), position(position_) {
3115 wchar_t operator*() const noexcept {
3116 const CharacterExtracted charExtracted = doc->ExtractCharacter(position);
3117 return charExtracted.character;
3119 UTF8Iterator &operator++() noexcept {
3120 position = doc->NextPosition(position, 1);
3121 return *this;
3123 UTF8Iterator operator++(int) noexcept {
3124 UTF8Iterator retVal(*this);
3125 position = doc->NextPosition(position, 1);
3126 return retVal;
3128 UTF8Iterator &operator--() noexcept {
3129 position = doc->NextPosition(position, -1);
3130 return *this;
3132 bool operator==(const UTF8Iterator &other) const noexcept {
3133 return doc == other.doc && position == other.position;
3135 bool operator!=(const UTF8Iterator &other) const noexcept {
3136 return doc != other.doc || position != other.position;
3138 Sci::Position Pos() const noexcept {
3139 return position;
3141 Sci::Position PosRoundUp() const noexcept {
3142 return position;
3146 #endif
3148 std::regex_constants::match_flag_type MatchFlags(const Document *doc, Sci::Position startPos, Sci::Position endPos, Sci::Position lineStartPos, Sci::Position lineEndPos) {
3149 std::regex_constants::match_flag_type flagsMatch = std::regex_constants::match_default;
3150 if (startPos != lineStartPos) {
3151 #ifdef _LIBCPP_VERSION
3152 flagsMatch |= std::regex_constants::match_not_bol;
3153 if (!doc->IsWordStartAt(startPos)) {
3154 flagsMatch |= std::regex_constants::match_not_bow;
3156 #else
3157 flagsMatch |= std::regex_constants::match_prev_avail;
3158 #endif
3160 if (endPos != lineEndPos) {
3161 flagsMatch |= std::regex_constants::match_not_eol;
3162 if (!doc->IsWordEndAt(endPos)) {
3163 flagsMatch |= std::regex_constants::match_not_eow;
3166 return flagsMatch;
3169 template<typename Iterator, typename Regex>
3170 bool MatchOnLines(const Document *doc, const Regex &regexp, const RESearchRange &resr, RESearch &search) {
3171 std::match_results<Iterator> match;
3173 // MSVC and libc++ have problems with ^ and $ matching line ends inside a range.
3174 // CRLF line ends are also a problem as ^ and $ only treat LF as a line end.
3175 // The std::regex::multiline option was added to C++17 to improve behaviour but
3176 // has not been implemented by compiler runtimes with MSVC always in multiline
3177 // mode and libc++ and libstdc++ always in single-line mode.
3178 // If multiline regex worked well then the line by line iteration could be removed
3179 // for the forwards case and replaced with the following:
3180 #ifdef REGEX_MULTILINE
3181 const Sci::Position lineStartPos = doc->LineStart(resr.lineRangeStart);
3182 const Sci::Position lineEndPos = doc->LineEnd(resr.lineRangeEnd);
3183 Iterator itStart(doc, resr.startPos);
3184 Iterator itEnd(doc, resr.endPos);
3185 const std::regex_constants::match_flag_type flagsMatch = MatchFlags(doc, resr.startPos, resr.endPos, lineStartPos, lineEndPos);
3186 const bool matched = std::regex_search(itStart, itEnd, match, regexp, flagsMatch);
3187 #else
3188 // Line by line.
3189 bool matched = false;
3190 for (Sci::Line line = resr.lineRangeStart; line != resr.lineRangeBreak; line += resr.increment) {
3191 const Sci::Position lineStartPos = doc->LineStart(line);
3192 const Sci::Position lineEndPos = doc->LineEnd(line);
3193 const Range lineRange = resr.LineRange(line, lineStartPos, lineEndPos);
3194 Iterator itStart(doc, lineRange.start);
3195 Iterator itEnd(doc, lineRange.end);
3196 const std::regex_constants::match_flag_type flagsMatch = MatchFlags(doc, lineRange.start, lineRange.end, lineStartPos, lineEndPos);
3197 std::regex_iterator<Iterator> it(itStart, itEnd, regexp, flagsMatch);
3198 for (const std::regex_iterator<Iterator> last; it != last; ++it) {
3199 match = *it;
3200 matched = true;
3201 if (resr.increment > 0) {
3202 break;
3205 if (matched) {
3206 break;
3209 #endif
3210 if (matched) {
3211 for (size_t co = 0; co < match.size() && co < RESearch::MAXTAG; co++) {
3212 search.bopat[co] = match[co].first.Pos();
3213 search.eopat[co] = match[co].second.PosRoundUp();
3216 return matched;
3219 Sci::Position Cxx11RegexFindText(const Document *doc, Sci::Position minPos, Sci::Position maxPos, const char *s,
3220 bool caseSensitive, Sci::Position *length, RESearch &search) {
3221 const RESearchRange resr(doc, minPos, maxPos);
3222 try {
3223 //ElapsedPeriod ep;
3224 std::regex::flag_type flagsRe = std::regex::ECMAScript;
3225 // Flags that appear to have no effect:
3226 // | std::regex::collate | std::regex::extended;
3227 if (!caseSensitive)
3228 flagsRe = flagsRe | std::regex::icase;
3230 #if defined(REGEX_MULTILINE) && !defined(_MSC_VER)
3231 flagsRe = flagsRe | std::regex::multiline;
3232 #endif
3234 // Clear the RESearch so can fill in matches
3235 search.Clear();
3237 bool matched = false;
3238 if (CpUtf8 == doc->dbcsCodePage) {
3239 const std::wstring ws = WStringFromUTF8(s);
3240 std::wregex regexp;
3241 regexp.assign(ws, flagsRe);
3242 matched = MatchOnLines<UTF8Iterator>(doc, regexp, resr, search);
3243 } else {
3244 std::regex regexp;
3245 regexp.assign(s, flagsRe);
3246 matched = MatchOnLines<ByteIterator>(doc, regexp, resr, search);
3249 Sci::Position posMatch = -1;
3250 if (matched) {
3251 posMatch = search.bopat[0];
3252 *length = search.eopat[0] - search.bopat[0];
3254 // Example - search in doc/ScintillaHistory.html for
3255 // [[:upper:]]eta[[:space:]]
3256 // On MacBook, normally around 1 second but with locale imbued -> 14 seconds.
3257 //const double durSearch = ep.Duration(true);
3258 //Platform::DebugPrintf("Search:%9.6g \n", durSearch);
3259 return posMatch;
3260 } catch (std::regex_error &) {
3261 // Failed to create regular expression
3262 throw RegexError();
3263 } catch (...) {
3264 // Failed in some other way
3265 return -1;
3269 #endif
3273 Sci::Position BuiltinRegex::FindText(Document *doc, Sci::Position minPos, Sci::Position maxPos, const char *s,
3274 bool caseSensitive, bool, bool, FindOption flags,
3275 Sci::Position *length) {
3277 #ifndef NO_CXX11_REGEX
3278 if (FlagSet(flags, FindOption::Cxx11RegEx)) {
3279 return Cxx11RegexFindText(doc, minPos, maxPos, s,
3280 caseSensitive, length, search);
3282 #endif
3284 const RESearchRange resr(doc, minPos, maxPos);
3286 const bool posix = FlagSet(flags, FindOption::Posix);
3288 const char *errmsg = search.Compile(s, *length, caseSensitive, posix);
3289 if (errmsg) {
3290 return -1;
3292 // Find a variable in a property file: \$(\([A-Za-z0-9_.]+\))
3293 // Replace first '.' with '-' in each property file variable reference:
3294 // Search: \$(\([A-Za-z0-9_-]+\)\.\([A-Za-z0-9_.]+\))
3295 // Replace: $(\1-\2)
3296 Sci::Position pos = -1;
3297 Sci::Position lenRet = 0;
3298 const bool searchforLineStart = s[0] == '^';
3299 const char searchEnd = s[*length - 1];
3300 const char searchEndPrev = (*length > 1) ? s[*length - 2] : '\0';
3301 const bool searchforLineEnd = (searchEnd == '$') && (searchEndPrev != '\\');
3302 for (Sci::Line line = resr.lineRangeStart; line != resr.lineRangeBreak; line += resr.increment) {
3303 const Sci::Position lineStartPos = doc->LineStart(line);
3304 const Sci::Position lineEndPos = doc->LineEnd(line);
3305 Sci::Position startOfLine = lineStartPos;
3306 Sci::Position endOfLine = lineEndPos;
3307 if (resr.increment == 1) {
3308 if (line == resr.lineRangeStart) {
3309 if ((resr.startPos != startOfLine) && searchforLineStart)
3310 continue; // Can't match start of line if start position after start of line
3311 startOfLine = resr.startPos;
3313 if (line == resr.lineRangeEnd) {
3314 if ((resr.endPos != endOfLine) && searchforLineEnd)
3315 continue; // Can't match end of line if end position before end of line
3316 endOfLine = resr.endPos;
3318 } else {
3319 if (line == resr.lineRangeEnd) {
3320 if ((resr.endPos != startOfLine) && searchforLineStart)
3321 continue; // Can't match start of line if end position after start of line
3322 startOfLine = resr.endPos;
3324 if (line == resr.lineRangeStart) {
3325 if ((resr.startPos != endOfLine) && searchforLineEnd)
3326 continue; // Can't match end of line if start position before end of line
3327 endOfLine = resr.startPos;
3331 const DocumentIndexer di(doc, endOfLine);
3332 search.SetLineRange(lineStartPos, lineEndPos);
3333 int success = search.Execute(di, startOfLine, endOfLine);
3334 if (success) {
3335 Sci::Position endPos = search.eopat[0];
3336 // There can be only one start of a line, so no need to look for last match in line
3337 if ((resr.increment == -1) && !searchforLineStart) {
3338 // Check for the last match on this line.
3339 while (success && (endPos < endOfLine)) {
3340 const RESearch::MatchPositions bopat = search.bopat;
3341 const RESearch::MatchPositions eopat = search.eopat;
3342 pos = endPos;
3343 if (pos == bopat[0]) {
3344 // empty match
3345 pos = doc->NextPosition(pos, 1);
3347 success = search.Execute(di, pos, endOfLine);
3348 if (success) {
3349 endPos = search.eopat[0];
3350 } else {
3351 search.bopat = bopat;
3352 search.eopat = eopat;
3356 pos = search.bopat[0];
3357 lenRet = endPos - pos;
3358 break;
3361 *length = lenRet;
3362 return pos;
3365 const char *BuiltinRegex::SubstituteByPosition(Document *doc, const char *text, Sci::Position *length) {
3366 substituted.clear();
3367 for (Sci::Position j = 0; j < *length; j++) {
3368 if (text[j] == '\\') {
3369 const char chNext = text[++j];
3370 if (chNext >= '0' && chNext <= '9') {
3371 const unsigned int patNum = chNext - '0';
3372 const Sci::Position startPos = search.bopat[patNum];
3373 const Sci::Position len = search.eopat[patNum] - startPos;
3374 if (len > 0) { // Will be null if try for a match that did not occur
3375 const size_t size = substituted.length();
3376 substituted.resize(size + len);
3377 doc->GetCharRange(substituted.data() + size, startPos, len);
3379 } else {
3380 switch (chNext) {
3381 case 'a':
3382 substituted.push_back('\a');
3383 break;
3384 case 'b':
3385 substituted.push_back('\b');
3386 break;
3387 case 'f':
3388 substituted.push_back('\f');
3389 break;
3390 case 'n':
3391 substituted.push_back('\n');
3392 break;
3393 case 'r':
3394 substituted.push_back('\r');
3395 break;
3396 case 't':
3397 substituted.push_back('\t');
3398 break;
3399 case 'v':
3400 substituted.push_back('\v');
3401 break;
3402 case '\\':
3403 substituted.push_back('\\');
3404 break;
3405 default:
3406 substituted.push_back('\\');
3407 j--;
3410 } else {
3411 substituted.push_back(text[j]);
3414 *length = substituted.length();
3415 return substituted.c_str();
3418 #ifndef SCI_OWNREGEX
3420 RegexSearchBase *Scintilla::Internal::CreateRegexSearch(CharClassify *charClassTable) {
3421 return new BuiltinRegex(charClassTable);
3424 #endif