1 // Scintilla source code edit control
3 ** Text document that handles notifications, DBCS, styling, words and end of line.
5 // Copyright 1998-2011 by Neil Hodgson <neilh@scintilla.org>
6 // The License.txt file describes the conditions under which this software may be distributed.
17 #include <string_view>
20 #include <forward_list>
26 #ifndef NO_CXX11_REGEX
30 #include "ScintillaTypes.h"
34 #include "Debugging.h"
36 #include "CharacterType.h"
37 #include "CharacterCategoryMap.h"
39 #include "SplitVector.h"
40 #include "Partitioning.h"
41 #include "RunStyles.h"
42 #include "CellBuffer.h"
44 #include "CharClassify.h"
45 #include "Decoration.h"
46 #include "CaseFolder.h"
49 #include "UniConversion.h"
50 #include "ElapsedPeriod.h"
52 using namespace Scintilla
;
53 using namespace Scintilla::Internal
;
55 #if defined(__GNUC__) && !defined(__clang__)
56 // False warnings from g++ 14.1 for UTF-8 accumulation code where UTF8MaxBytes allocated.
57 #pragma GCC diagnostic ignored "-Wstringop-overflow"
60 LexInterface::LexInterface(Document
*pdoc_
) noexcept
: pdoc(pdoc_
), performingStyle(false) {
63 LexInterface::~LexInterface() noexcept
= default;
65 void LexInterface::SetInstance(ILexer5
*instance_
) noexcept
{
66 instance
.reset(instance_
);
69 void LexInterface::Colourise(Sci::Position start
, Sci::Position end
) {
70 if (pdoc
&& instance
&& !performingStyle
) {
71 // Protect against reentrance, which may occur, for example, when
72 // fold points are discovered while performing styling and the folding
73 // code looks for child lines which may trigger styling.
74 performingStyle
= true;
76 const Sci::Position lengthDoc
= pdoc
->Length();
79 const Sci::Position len
= end
- start
;
81 PLATFORM_ASSERT(len
>= 0);
82 PLATFORM_ASSERT(start
+ len
<= lengthDoc
);
86 styleStart
= pdoc
->StyleAt(start
- 1);
89 instance
->Lex(start
, len
, styleStart
, pdoc
);
90 instance
->Fold(start
, len
, styleStart
, pdoc
);
93 performingStyle
= false;
97 LineEndType
LexInterface::LineEndTypesSupported() {
99 return static_cast<LineEndType
>(instance
->LineEndTypesSupported());
101 return LineEndType::Default
;
104 bool LexInterface::UseContainerLexing() const noexcept
{
108 ActionDuration::ActionDuration(double duration_
, double minDuration_
, double maxDuration_
) noexcept
:
109 duration(duration_
), minDuration(minDuration_
), maxDuration(maxDuration_
) {
112 void ActionDuration::AddSample(size_t numberActions
, double durationOfActions
) noexcept
{
113 // Only adjust for multiple actions to avoid instability
114 if (numberActions
< 8)
117 // Alpha value for exponential smoothing.
118 // Most recent value contributes 25% to smoothed value.
119 constexpr double alpha
= 0.25;
121 const double durationOne
= durationOfActions
/ numberActions
;
122 duration
= std::clamp(alpha
* durationOne
+ (1.0 - alpha
) * duration
,
123 minDuration
, maxDuration
);
126 double ActionDuration::Duration() const noexcept
{
130 size_t ActionDuration::ActionsInAllowedTime(double secondsAllowed
) const noexcept
{
131 return std::lround(secondsAllowed
/ Duration());
134 CharacterExtracted::CharacterExtracted(const unsigned char *charBytes
, size_t widthCharBytes
) noexcept
{
135 const int utf8status
= UTF8Classify(charBytes
, widthCharBytes
);
136 if (utf8status
& UTF8MaskInvalid
) {
137 // Treat as invalid and use up just one byte
138 character
= unicodeReplacementChar
;
141 character
= UnicodeFromUTF8(charBytes
);
142 widthBytes
= utf8status
& UTF8MaskWidth
;
146 Document::Document(DocumentOption options
) :
147 cb(!FlagSet(options
, DocumentOption::StylesNone
), FlagSet(options
, DocumentOption::TextLarge
)),
148 durationStyleOneByte(0.000001, 0.0000001, 0.00001) {
151 eolMode
= EndOfLine::CrLf
;
153 eolMode
= EndOfLine::Lf
;
155 dbcsCodePage
= CpUtf8
;
156 lineEndBitSet
= LineEndType::Default
;
159 enteredModification
= 0;
161 enteredReadOnlyCount
= 0;
162 insertionSet
= false;
165 actualIndentInChars
= 8;
168 backspaceUnindents
= false;
170 matchesValid
= false;
172 perLineData
[ldMarkers
] = std::make_unique
<LineMarkers
>();
173 perLineData
[ldLevels
] = std::make_unique
<LineLevels
>();
174 perLineData
[ldState
] = std::make_unique
<LineState
>();
175 perLineData
[ldMargin
] = std::make_unique
<LineAnnotation
>();
176 perLineData
[ldAnnotation
] = std::make_unique
<LineAnnotation
>();
177 perLineData
[ldEOLAnnotation
] = std::make_unique
<LineAnnotation
>();
179 decorations
= DecorationListCreate(IsLarge());
182 cb
.SetUTF8Substance(CpUtf8
== dbcsCodePage
);
185 Document::~Document() {
186 for (const WatcherWithUserData
&watcher
: watchers
) {
187 watcher
.watcher
->NotifyDeleted(this, watcher
.userData
);
191 // Increase reference count and return its previous value.
192 int SCI_METHOD
Document::AddRef() noexcept
{
196 // Decrease reference count and return its previous value.
197 // Delete the document if reference count reaches zero.
198 int SCI_METHOD
Document::Release() {
199 const int curRefCount
= --refCount
;
200 if (curRefCount
== 0)
205 void Document::Init() {
206 for (const std::unique_ptr
<PerLine
> &pl
: perLineData
) {
212 void Document::InsertLine(Sci::Line line
) {
213 for (const std::unique_ptr
<PerLine
> &pl
: perLineData
) {
215 pl
->InsertLine(line
);
219 void Document::InsertLines(Sci::Line line
, Sci::Line lines
) {
220 for (const auto &pl
: perLineData
) {
222 pl
->InsertLines(line
, lines
);
226 void Document::RemoveLine(Sci::Line line
) {
227 for (const std::unique_ptr
<PerLine
> &pl
: perLineData
) {
229 pl
->RemoveLine(line
);
233 LineMarkers
*Document::Markers() const noexcept
{
234 return static_cast<LineMarkers
*>(perLineData
[ldMarkers
].get());
237 LineLevels
*Document::Levels() const noexcept
{
238 return static_cast<LineLevels
*>(perLineData
[ldLevels
].get());
241 LineState
*Document::States() const noexcept
{
242 return static_cast<LineState
*>(perLineData
[ldState
].get());
245 LineAnnotation
*Document::Margins() const noexcept
{
246 return static_cast<LineAnnotation
*>(perLineData
[ldMargin
].get());
249 LineAnnotation
*Document::Annotations() const noexcept
{
250 return static_cast<LineAnnotation
*>(perLineData
[ldAnnotation
].get());
253 LineAnnotation
*Document::EOLAnnotations() const noexcept
{
254 return static_cast<LineAnnotation
*>(perLineData
[ldEOLAnnotation
].get());
257 LineEndType
Document::LineEndTypesSupported() const {
258 if ((CpUtf8
== dbcsCodePage
) && pli
)
259 return pli
->LineEndTypesSupported();
261 return LineEndType::Default
;
264 bool Document::SetDBCSCodePage(int dbcsCodePage_
) {
265 if (dbcsCodePage
!= dbcsCodePage_
) {
266 dbcsCodePage
= dbcsCodePage_
;
267 SetCaseFolder(nullptr);
268 cb
.SetLineEndTypes(lineEndBitSet
& LineEndTypesSupported());
269 cb
.SetUTF8Substance(CpUtf8
== dbcsCodePage
);
270 ModifiedAt(0); // Need to restyle whole document
277 bool Document::SetLineEndTypesAllowed(LineEndType lineEndBitSet_
) {
278 if (lineEndBitSet
!= lineEndBitSet_
) {
279 lineEndBitSet
= lineEndBitSet_
;
280 const LineEndType lineEndBitSetActive
= lineEndBitSet
& LineEndTypesSupported();
281 if (lineEndBitSetActive
!= cb
.GetLineEndTypes()) {
283 cb
.SetLineEndTypes(lineEndBitSetActive
);
293 void Document::SetSavePoint() {
295 NotifySavePoint(true);
298 void Document::TentativeUndo() {
299 if (!TentativeActive())
302 if (enteredModification
== 0) {
303 enteredModification
++;
304 if (!cb
.IsReadOnly()) {
305 const bool startSavePoint
= cb
.IsSavePoint();
306 bool multiLine
= false;
307 const int steps
= cb
.TentativeSteps();
308 //Platform::DebugPrintf("Steps=%d\n", steps);
309 for (int step
= 0; step
< steps
; step
++) {
310 const Sci::Line prevLinesTotal
= LinesTotal();
311 const Action action
= cb
.GetUndoStep();
312 if (action
.at
== ActionType::remove
) {
313 NotifyModified(DocModification(
314 ModificationFlags::BeforeInsert
| ModificationFlags::Undo
, action
));
315 } else if (action
.at
== ActionType::container
) {
316 DocModification
dm(ModificationFlags::Container
| ModificationFlags::Undo
);
317 dm
.token
= action
.position
;
320 NotifyModified(DocModification(
321 ModificationFlags::BeforeDelete
| ModificationFlags::Undo
, action
));
323 cb
.PerformUndoStep();
324 if (action
.at
!= ActionType::container
) {
325 ModifiedAt(action
.position
);
328 ModificationFlags modFlags
= ModificationFlags::Undo
;
329 // With undo, an insertion action becomes a deletion notification
330 if (action
.at
== ActionType::remove
) {
331 modFlags
|= ModificationFlags::InsertText
;
332 } else if (action
.at
== ActionType::insert
) {
333 modFlags
|= ModificationFlags::DeleteText
;
336 modFlags
|= ModificationFlags::MultiStepUndoRedo
;
337 const Sci::Line linesAdded
= LinesTotal() - prevLinesTotal
;
340 if (step
== steps
- 1) {
341 modFlags
|= ModificationFlags::LastStepInUndoRedo
;
343 modFlags
|= ModificationFlags::MultilineUndoRedo
;
345 NotifyModified(DocModification(modFlags
, action
.position
, action
.lenData
,
346 linesAdded
, action
.data
));
349 const bool endSavePoint
= cb
.IsSavePoint();
350 if (startSavePoint
!= endSavePoint
)
351 NotifySavePoint(endSavePoint
);
353 cb
.TentativeCommit();
355 enteredModification
--;
359 int Document::UndoActions() const noexcept
{
360 return cb
.UndoActions();
363 void Document::SetUndoSavePoint(int action
) noexcept
{
364 cb
.SetUndoSavePoint(action
);
367 int Document::UndoSavePoint() const noexcept
{
368 return cb
.UndoSavePoint();
371 void Document::SetUndoDetach(int action
) noexcept
{
372 cb
.SetUndoDetach(action
);
375 int Document::UndoDetach() const noexcept
{
376 return cb
.UndoDetach();
379 void Document::SetUndoTentative(int action
) noexcept
{
380 cb
.SetUndoTentative(action
);
383 int Document::UndoTentative() const noexcept
{
384 return cb
.UndoTentative();
387 void Document::SetUndoCurrent(int action
) {
388 cb
.SetUndoCurrent(action
);
391 int Document::UndoCurrent() const noexcept
{
392 return cb
.UndoCurrent();
395 int Document::UndoActionType(int action
) const noexcept
{
396 return cb
.UndoActionType(action
);
399 Sci::Position
Document::UndoActionPosition(int action
) const noexcept
{
400 return cb
.UndoActionPosition(action
);
403 std::string_view
Document::UndoActionText(int action
) const noexcept
{
404 return cb
.UndoActionText(action
);
407 void Document::PushUndoActionType(int type
, Sci::Position position
) {
408 cb
.PushUndoActionType(type
, position
);
411 void Document::ChangeLastUndoActionText(size_t length
, const char *text
) {
412 cb
.ChangeLastUndoActionText(length
, text
);
415 int Document::GetMark(Sci::Line line
, bool includeChangeHistory
) const {
416 int marksHistory
= 0;
417 if (includeChangeHistory
&& (line
< LinesTotal())) {
418 int marksEdition
= 0;
420 const Sci::Position start
= LineStart(line
);
421 const Sci::Position lineNext
= LineStart(line
+ 1);
422 for (Sci::Position position
= start
; position
< lineNext
;) {
423 const int edition
= EditionAt(position
);
425 marksEdition
|= 1 << (edition
-1);
427 position
= EditionEndRun(position
);
429 const Sci::Position lineEnd
= LineEnd(line
);
430 for (Sci::Position position
= start
; position
<= lineEnd
;) {
431 marksEdition
|= EditionDeletesAt(position
);
432 position
= EditionNextDelete(position
);
435 /* Bits: RevertedToOrigin, Saved, Modified, RevertedToModified */
436 constexpr unsigned int editionShift
= static_cast<unsigned int>(MarkerOutline::HistoryRevertedToOrigin
);
437 marksHistory
= marksEdition
<< editionShift
;
440 return marksHistory
| Markers()->MarkValue(line
);
443 Sci::Line
Document::MarkerNext(Sci::Line lineStart
, int mask
) const noexcept
{
444 return Markers()->MarkerNext(lineStart
, mask
);
447 int Document::AddMark(Sci::Line line
, int markerNum
) {
448 if (line
>= 0 && line
< LinesTotal()) {
449 const int prev
= Markers()->AddMark(line
, markerNum
, LinesTotal());
450 const DocModification
mh(ModificationFlags::ChangeMarker
, LineStart(line
), 0, 0, nullptr, line
);
458 void Document::AddMarkSet(Sci::Line line
, int valueSet
) {
459 if (line
< 0 || line
>= LinesTotal()) {
462 unsigned int m
= valueSet
;
463 for (int i
= 0; m
; i
++, m
>>= 1) {
465 Markers()->AddMark(line
, i
, LinesTotal());
467 const DocModification
mh(ModificationFlags::ChangeMarker
, LineStart(line
), 0, 0, nullptr, line
);
471 void Document::DeleteMark(Sci::Line line
, int markerNum
) {
472 Markers()->DeleteMark(line
, markerNum
, false);
473 const DocModification
mh(ModificationFlags::ChangeMarker
, LineStart(line
), 0, 0, nullptr, line
);
477 void Document::DeleteMarkFromHandle(int markerHandle
) {
478 Markers()->DeleteMarkFromHandle(markerHandle
);
479 DocModification
mh(ModificationFlags::ChangeMarker
);
484 void Document::DeleteAllMarks(int markerNum
) {
485 bool someChanges
= false;
486 for (Sci::Line line
= 0; line
< LinesTotal(); line
++) {
487 if (Markers()->DeleteMark(line
, markerNum
, true))
491 DocModification
mh(ModificationFlags::ChangeMarker
);
497 Sci::Line
Document::LineFromHandle(int markerHandle
) const noexcept
{
498 return Markers()->LineFromHandle(markerHandle
);
501 int Document::MarkerNumberFromLine(Sci::Line line
, int which
) const noexcept
{
502 return Markers()->NumberFromLine(line
, which
);
505 int Document::MarkerHandleFromLine(Sci::Line line
, int which
) const noexcept
{
506 return Markers()->HandleFromLine(line
, which
);
509 Sci_Position SCI_METHOD
Document::LineStart(Sci_Position line
) const {
510 return cb
.LineStart(line
);
513 Range
Document::LineRange(Sci::Line line
) const noexcept
{
514 return {cb
.LineStart(line
), cb
.LineStart(line
+ 1)};
517 bool Document::IsLineStartPosition(Sci::Position position
) const noexcept
{
518 return LineStartPosition(position
) == position
;
521 Sci_Position SCI_METHOD
Document::LineEnd(Sci_Position line
) const {
522 return cb
.LineEnd(line
);
525 int SCI_METHOD
Document::DEVersion() const noexcept
{
529 void SCI_METHOD
Document::SetErrorStatus(int status
) {
530 // Tell the watchers an error has occurred.
531 for (const WatcherWithUserData
&watcher
: watchers
) {
532 watcher
.watcher
->NotifyErrorOccurred(this, watcher
.userData
, static_cast<Status
>(status
));
536 Sci_Position SCI_METHOD
Document::LineFromPosition(Sci_Position pos
) const {
537 return cb
.LineFromPosition(pos
);
540 Sci::Line
Document::SciLineFromPosition(Sci::Position pos
) const noexcept
{
541 // Avoids casting in callers for this very common function
542 return cb
.LineFromPosition(pos
);
545 Sci::Position
Document::LineStartPosition(Sci::Position position
) const noexcept
{
546 return cb
.LineStart(cb
.LineFromPosition(position
));
549 Sci::Position
Document::LineEndPosition(Sci::Position position
) const noexcept
{
550 return cb
.LineEnd(cb
.LineFromPosition(position
));
553 bool Document::IsLineEndPosition(Sci::Position position
) const noexcept
{
554 return LineEndPosition(position
) == position
;
557 bool Document::IsPositionInLineEnd(Sci::Position position
) const noexcept
{
558 return position
>= LineEndPosition(position
);
561 Sci::Position
Document::VCHomePosition(Sci::Position position
) const {
562 const Sci::Line line
= SciLineFromPosition(position
);
563 const Sci::Position startPosition
= LineStart(line
);
564 const Sci::Position endLine
= LineEnd(line
);
565 Sci::Position startText
= startPosition
;
566 while (startText
< endLine
&& IsSpaceOrTab(cb
.CharAt(startText
)))
568 if (position
== startText
)
569 return startPosition
;
574 Sci::Position
Document::IndexLineStart(Sci::Line line
, LineCharacterIndexType lineCharacterIndex
) const noexcept
{
575 return cb
.IndexLineStart(line
, lineCharacterIndex
);
578 Sci::Line
Document::LineFromPositionIndex(Sci::Position pos
, LineCharacterIndexType lineCharacterIndex
) const noexcept
{
579 return cb
.LineFromPositionIndex(pos
, lineCharacterIndex
);
582 Sci::Line
Document::LineFromPositionAfter(Sci::Line line
, Sci::Position length
) const noexcept
{
583 const Sci::Position posAfter
= cb
.LineStart(line
) + length
;
584 if (posAfter
>= LengthNoExcept()) {
587 const Sci::Line lineAfter
= SciLineFromPosition(posAfter
);
588 if (lineAfter
> line
) {
591 // Want to make some progress so return next line
592 return lineAfter
+ 1;
596 int SCI_METHOD
Document::SetLevel(Sci_Position line
, int level
) {
597 const int prev
= Levels()->SetLevel(line
, level
, LinesTotal());
599 DocModification
mh(ModificationFlags::ChangeFold
| ModificationFlags::ChangeMarker
,
600 LineStart(line
), 0, 0, nullptr, line
);
601 mh
.foldLevelNow
= static_cast<FoldLevel
>(level
);
602 mh
.foldLevelPrev
= static_cast<FoldLevel
>(prev
);
608 int SCI_METHOD
Document::GetLevel(Sci_Position line
) const {
609 return Levels()->GetLevel(line
);
612 FoldLevel
Document::GetFoldLevel(Sci_Position line
) const noexcept
{
613 return Levels()->GetFoldLevel(line
);
616 void Document::ClearLevels() {
617 Levels()->ClearLevels();
620 static bool IsSubordinate(FoldLevel levelStart
, FoldLevel levelTry
) noexcept
{
621 if (LevelIsWhitespace(levelTry
))
624 return LevelNumber(levelStart
) < LevelNumber(levelTry
);
627 Sci::Line
Document::GetLastChild(Sci::Line lineParent
, std::optional
<FoldLevel
> level
, Sci::Line lastLine
) {
628 const FoldLevel levelStart
= LevelNumberPart(level
? *level
: GetFoldLevel(lineParent
));
629 const Sci::Line maxLine
= LinesTotal();
630 const Sci::Line lookLastLine
= (lastLine
!= -1) ? std::min(LinesTotal() - 1, lastLine
) : -1;
631 Sci::Line lineMaxSubord
= lineParent
;
632 while (lineMaxSubord
< maxLine
- 1) {
633 EnsureStyledTo(LineStart(lineMaxSubord
+ 2));
634 if (!IsSubordinate(levelStart
, GetFoldLevel(lineMaxSubord
+ 1)))
636 if ((lookLastLine
!= -1) && (lineMaxSubord
>= lookLastLine
) && !LevelIsWhitespace(GetFoldLevel(lineMaxSubord
)))
640 if (lineMaxSubord
> lineParent
) {
641 if (levelStart
> LevelNumberPart(GetFoldLevel(lineMaxSubord
+ 1))) {
642 // Have chewed up some whitespace that belongs to a parent so seek back
643 if (LevelIsWhitespace(GetFoldLevel(lineMaxSubord
))) {
648 return lineMaxSubord
;
651 Sci::Line
Document::GetFoldParent(Sci::Line line
) const noexcept
{
652 return Levels()->GetFoldParent(line
);
655 void Document::GetHighlightDelimiters(HighlightDelimiter
&highlightDelimiter
, Sci::Line line
, Sci::Line lastLine
) {
656 const FoldLevel level
= GetFoldLevel(line
);
657 const Sci::Line lookLastLine
= std::max(line
, lastLine
) + 1;
659 Sci::Line lookLine
= line
;
660 FoldLevel lookLineLevel
= level
;
661 FoldLevel lookLineLevelNum
= LevelNumberPart(lookLineLevel
);
662 while ((lookLine
> 0) && (LevelIsWhitespace(lookLineLevel
) ||
663 (LevelIsHeader(lookLineLevel
) && (lookLineLevelNum
>= LevelNumberPart(GetFoldLevel(lookLine
+ 1)))))) {
664 lookLineLevel
= GetFoldLevel(--lookLine
);
665 lookLineLevelNum
= LevelNumberPart(lookLineLevel
);
668 Sci::Line beginFoldBlock
= LevelIsHeader(lookLineLevel
) ? lookLine
: GetFoldParent(lookLine
);
669 if (beginFoldBlock
== -1) {
670 highlightDelimiter
.Clear();
674 Sci::Line endFoldBlock
= GetLastChild(beginFoldBlock
, {}, lookLastLine
);
675 Sci::Line firstChangeableLineBefore
= -1;
676 if (endFoldBlock
< line
) {
677 lookLine
= beginFoldBlock
- 1;
678 lookLineLevel
= GetFoldLevel(lookLine
);
679 lookLineLevelNum
= LevelNumberPart(lookLineLevel
);
680 while ((lookLine
>= 0) && (lookLineLevelNum
>= FoldLevel::Base
)) {
681 if (LevelIsHeader(lookLineLevel
)) {
682 if (GetLastChild(lookLine
, {}, lookLastLine
) == line
) {
683 beginFoldBlock
= lookLine
;
685 firstChangeableLineBefore
= line
- 1;
688 if ((lookLine
> 0) && (lookLineLevelNum
== FoldLevel::Base
) && (LevelNumberPart(GetFoldLevel(lookLine
- 1)) > lookLineLevelNum
))
690 lookLineLevel
= GetFoldLevel(--lookLine
);
691 lookLineLevelNum
= LevelNumberPart(lookLineLevel
);
694 if (firstChangeableLineBefore
== -1) {
695 for (lookLine
= line
- 1, lookLineLevel
= GetFoldLevel(lookLine
), lookLineLevelNum
= LevelNumberPart(lookLineLevel
);
696 lookLine
>= beginFoldBlock
;
697 lookLineLevel
= GetFoldLevel(--lookLine
), lookLineLevelNum
= LevelNumberPart(lookLineLevel
)) {
698 if (LevelIsWhitespace(lookLineLevel
) || (lookLineLevelNum
> LevelNumberPart(level
))) {
699 firstChangeableLineBefore
= lookLine
;
704 if (firstChangeableLineBefore
== -1)
705 firstChangeableLineBefore
= beginFoldBlock
- 1;
707 Sci::Line firstChangeableLineAfter
= -1;
708 for (lookLine
= line
+ 1, lookLineLevel
= GetFoldLevel(lookLine
), lookLineLevelNum
= LevelNumberPart(lookLineLevel
);
709 lookLine
<= endFoldBlock
;
710 lookLineLevel
= GetFoldLevel(++lookLine
), lookLineLevelNum
= LevelNumberPart(lookLineLevel
)) {
711 if (LevelIsHeader(lookLineLevel
) && (lookLineLevelNum
< LevelNumberPart(GetFoldLevel(lookLine
+ 1)))) {
712 firstChangeableLineAfter
= lookLine
;
716 if (firstChangeableLineAfter
== -1)
717 firstChangeableLineAfter
= endFoldBlock
+ 1;
719 highlightDelimiter
.beginFoldBlock
= beginFoldBlock
;
720 highlightDelimiter
.endFoldBlock
= endFoldBlock
;
721 highlightDelimiter
.firstChangeableLineBefore
= firstChangeableLineBefore
;
722 highlightDelimiter
.firstChangeableLineAfter
= firstChangeableLineAfter
;
725 Sci::Position
Document::ClampPositionIntoDocument(Sci::Position pos
) const noexcept
{
726 return std::clamp
<Sci::Position
>(pos
, 0, LengthNoExcept());
729 bool Document::IsCrLf(Sci::Position pos
) const noexcept
{
732 if (pos
>= (LengthNoExcept() - 1))
734 return (cb
.CharAt(pos
) == '\r') && (cb
.CharAt(pos
+ 1) == '\n');
737 int Document::LenChar(Sci::Position pos
) const noexcept
{
738 if (pos
< 0 || pos
>= LengthNoExcept()) {
739 // Returning 1 instead of 0 to defend against hanging with a loop that goes (or starts) out of bounds.
741 } else if (IsCrLf(pos
)) {
745 const unsigned char leadByte
= cb
.UCharAt(pos
);
746 if (!dbcsCodePage
|| UTF8IsAscii(leadByte
)) {
747 // Common case: ASCII character
750 if (CpUtf8
== dbcsCodePage
) {
751 const int widthCharBytes
= UTF8BytesOfLead
[leadByte
];
752 unsigned char charBytes
[UTF8MaxBytes
] = { leadByte
, 0, 0, 0 };
753 for (int b
= 1; b
< widthCharBytes
; b
++) {
754 charBytes
[b
] = cb
.UCharAt(pos
+ b
);
756 const int utf8status
= UTF8Classify(charBytes
, widthCharBytes
);
757 if (utf8status
& UTF8MaskInvalid
) {
758 // Treat as invalid and use up just one byte
761 return utf8status
& UTF8MaskWidth
;
764 if (IsDBCSLeadByteNoExcept(leadByte
) && IsDBCSTrailByteNoExcept(cb
.CharAt(pos
+ 1))) {
772 bool Document::InGoodUTF8(Sci::Position pos
, Sci::Position
&start
, Sci::Position
&end
) const noexcept
{
773 Sci::Position trail
= pos
;
774 while ((trail
>0) && (pos
-trail
< UTF8MaxBytes
) && UTF8IsTrailByte(cb
.UCharAt(trail
-1)))
776 start
= (trail
> 0) ? trail
-1 : trail
;
778 const unsigned char leadByte
= cb
.UCharAt(start
);
779 const int widthCharBytes
= UTF8BytesOfLead
[leadByte
];
780 if (widthCharBytes
== 1) {
783 const int trailBytes
= widthCharBytes
- 1;
784 const Sci::Position len
= pos
- start
;
785 if (len
> trailBytes
)
786 // pos too far from lead
788 unsigned char charBytes
[UTF8MaxBytes
] = {leadByte
,0,0,0};
789 for (Sci::Position b
=1; b
<widthCharBytes
&& ((start
+b
) < cb
.Length()); b
++)
790 charBytes
[b
] = cb
.CharAt(start
+b
);
791 const int utf8status
= UTF8Classify(charBytes
, widthCharBytes
);
792 if (utf8status
& UTF8MaskInvalid
)
794 end
= start
+ widthCharBytes
;
799 // Normalise a position so that it is not part way through a multi-byte character.
800 // This can occur in two situations -
801 // When lines are terminated with \r\n pairs which should be treated as one character.
802 // When displaying DBCS text such as Japanese.
803 // If moving, move the position in the indicated direction.
804 Sci::Position
Document::MovePositionOutsideChar(Sci::Position pos
, Sci::Position moveDir
, bool checkLineEnd
) const noexcept
{
805 //Platform::DebugPrintf("NoCRLF %d %d\n", pos, moveDir);
806 // If out of range, just return minimum/maximum value.
809 if (pos
>= LengthNoExcept())
810 return LengthNoExcept();
812 // PLATFORM_ASSERT(pos > 0 && pos < LengthNoExcept());
813 if (checkLineEnd
&& IsCrLf(pos
- 1)) {
821 if (CpUtf8
== dbcsCodePage
) {
822 const unsigned char ch
= cb
.UCharAt(pos
);
823 // If ch is not a trail byte then pos is valid intercharacter position
824 if (UTF8IsTrailByte(ch
)) {
825 Sci::Position startUTF
= pos
;
826 Sci::Position endUTF
= pos
;
827 if (InGoodUTF8(pos
, startUTF
, endUTF
)) {
828 // ch is a trail byte within a UTF-8 character
834 // Else invalid UTF-8 so return position of isolated trail byte
837 // Anchor DBCS calculations at start of line because start of line can
838 // not be a DBCS trail byte.
839 const Sci::Position posStartLine
= LineStartPosition(pos
);
840 if (pos
== posStartLine
)
843 // Step back until a non-lead-byte is found.
844 Sci::Position posCheck
= pos
;
845 while ((posCheck
> posStartLine
) && IsDBCSLeadByteNoExcept(cb
.CharAt(posCheck
-1)))
848 // Check from known start of character.
849 while (posCheck
< pos
) {
850 const int mbsize
= IsDBCSDualByteAt(posCheck
) ? 2 : 1;
851 if (posCheck
+ mbsize
== pos
) {
853 } else if (posCheck
+ mbsize
> pos
) {
855 return posCheck
+ mbsize
;
868 // NextPosition moves between valid positions - it can not handle a position in the middle of a
869 // multi-byte character. It is used to iterate through text more efficiently than MovePositionOutsideChar.
870 // A \r\n pair is treated as two characters.
871 Sci::Position
Document::NextPosition(Sci::Position pos
, int moveDir
) const noexcept
{
872 // If out of range, just return minimum/maximum value.
873 const int increment
= (moveDir
> 0) ? 1 : -1;
874 if (pos
+ increment
<= 0)
876 if (pos
+ increment
>= cb
.Length())
880 if (CpUtf8
== dbcsCodePage
) {
881 if (increment
== 1) {
882 // Simple forward movement case so can avoid some checks
883 const unsigned char leadByte
= cb
.UCharAt(pos
);
884 if (UTF8IsAscii(leadByte
)) {
885 // Single byte character or invalid
888 const int widthCharBytes
= UTF8BytesOfLead
[leadByte
];
889 unsigned char charBytes
[UTF8MaxBytes
] = {leadByte
,0,0,0};
890 for (int b
=1; b
<widthCharBytes
; b
++)
891 charBytes
[b
] = cb
.CharAt(pos
+b
);
892 const int utf8status
= UTF8Classify(charBytes
, widthCharBytes
);
893 if (utf8status
& UTF8MaskInvalid
)
896 pos
+= utf8status
& UTF8MaskWidth
;
899 // Examine byte before position
901 const unsigned char ch
= cb
.UCharAt(pos
);
902 // If ch is not a trail byte then pos is valid intercharacter position
903 if (UTF8IsTrailByte(ch
)) {
904 // If ch is a trail byte in a valid UTF-8 character then return start of character
905 Sci::Position startUTF
= pos
;
906 Sci::Position endUTF
= pos
;
907 if (InGoodUTF8(pos
, startUTF
, endUTF
)) {
910 // Else invalid UTF-8 so return position of isolated trail byte
915 const int mbsize
= IsDBCSDualByteAt(pos
) ? 2 : 1;
917 if (pos
> cb
.Length())
920 // Anchor DBCS calculations at start of line because start of line can
921 // not be a DBCS trail byte.
922 const Sci::Position posStartLine
= LineStartPosition(pos
);
923 // See http://msdn.microsoft.com/en-us/library/cc194792%28v=MSDN.10%29.aspx
924 // http://msdn.microsoft.com/en-us/library/cc194790.aspx
925 if ((pos
- 1) <= posStartLine
) {
927 } else if (IsDBCSLeadByteNoExcept(cb
.CharAt(pos
- 1))) {
928 // Should actually be trail byte
929 if (IsDBCSDualByteAt(pos
- 2)) {
932 // Invalid byte pair so treat as one byte wide
936 // Otherwise, step back until a non-lead-byte is found.
937 Sci::Position posTemp
= pos
- 1;
938 while (posStartLine
<= --posTemp
&& IsDBCSLeadByteNoExcept(cb
.CharAt(posTemp
)))
940 // Now posTemp+1 must point to the beginning of a character,
941 // so figure out whether we went back an even or an odd
942 // number of bytes and go back 1 or 2 bytes, respectively.
943 const Sci::Position widthLast
= ((pos
- posTemp
) & 1) + 1;
944 if ((widthLast
== 2) && (IsDBCSDualByteAt(pos
- widthLast
))) {
945 return pos
- widthLast
;
947 // Byte before pos may be valid character or may be an invalid second byte
959 bool Document::NextCharacter(Sci::Position
&pos
, int moveDir
) const noexcept
{
960 // Returns true if pos changed
961 Sci::Position posNext
= NextPosition(pos
, moveDir
);
962 if (posNext
== pos
) {
970 CharacterExtracted
Document::CharacterAfter(Sci::Position position
) const noexcept
{
971 if (position
>= LengthNoExcept()) {
972 return CharacterExtracted(unicodeReplacementChar
, 0);
974 const unsigned char leadByte
= cb
.UCharAt(position
);
975 if (!dbcsCodePage
|| UTF8IsAscii(leadByte
)) {
976 // Common case: ASCII character
977 return CharacterExtracted(leadByte
, 1);
979 if (CpUtf8
== dbcsCodePage
) {
980 const int widthCharBytes
= UTF8BytesOfLead
[leadByte
];
981 unsigned char charBytes
[UTF8MaxBytes
] = { leadByte
, 0, 0, 0 };
982 for (int b
= 1; b
<widthCharBytes
; b
++)
983 charBytes
[b
] = cb
.UCharAt(position
+ b
);
984 return CharacterExtracted(charBytes
, widthCharBytes
);
986 if (IsDBCSLeadByteNoExcept(leadByte
)) {
987 const unsigned char trailByte
= cb
.UCharAt(position
+ 1);
988 if (IsDBCSTrailByteNoExcept(trailByte
)) {
989 return CharacterExtracted::DBCS(leadByte
, trailByte
);
992 return CharacterExtracted(leadByte
, 1);
996 CharacterExtracted
Document::CharacterBefore(Sci::Position position
) const noexcept
{
998 return CharacterExtracted(unicodeReplacementChar
, 0);
1000 const unsigned char previousByte
= cb
.UCharAt(position
- 1);
1001 if (0 == dbcsCodePage
) {
1002 return CharacterExtracted(previousByte
, 1);
1004 if (CpUtf8
== dbcsCodePage
) {
1005 if (UTF8IsAscii(previousByte
)) {
1006 return CharacterExtracted(previousByte
, 1);
1009 // If previousByte is not a trail byte then its invalid
1010 if (UTF8IsTrailByte(previousByte
)) {
1011 // If previousByte is a trail byte in a valid UTF-8 character then find start of character
1012 Sci::Position startUTF
= position
;
1013 Sci::Position endUTF
= position
;
1014 if (InGoodUTF8(position
, startUTF
, endUTF
)) {
1015 const Sci::Position widthCharBytes
= endUTF
- startUTF
;
1016 unsigned char charBytes
[UTF8MaxBytes
] = { 0, 0, 0, 0 };
1017 for (Sci::Position b
= 0; b
<widthCharBytes
; b
++)
1018 charBytes
[b
] = cb
.UCharAt(startUTF
+ b
);
1019 return CharacterExtracted(charBytes
, widthCharBytes
);
1021 // Else invalid UTF-8 so return position of isolated trail byte
1023 return CharacterExtracted(unicodeReplacementChar
, 1);
1025 // Moving backwards in DBCS is complex so use NextPosition
1026 const Sci::Position posStartCharacter
= NextPosition(position
, -1);
1027 return CharacterAfter(posStartCharacter
);
1031 // Return -1 on out-of-bounds
1032 Sci_Position SCI_METHOD
Document::GetRelativePosition(Sci_Position positionStart
, Sci_Position characterOffset
) const {
1033 Sci::Position pos
= positionStart
;
1035 const int increment
= (characterOffset
> 0) ? 1 : -1;
1036 while (characterOffset
!= 0) {
1037 const Sci::Position posNext
= NextPosition(pos
, increment
);
1039 return Sci::invalidPosition
;
1041 characterOffset
-= increment
;
1044 pos
= positionStart
+ characterOffset
;
1045 if ((pos
< 0) || (pos
> Length()))
1046 return Sci::invalidPosition
;
1051 Sci::Position
Document::GetRelativePositionUTF16(Sci::Position positionStart
, Sci::Position characterOffset
) const noexcept
{
1052 Sci::Position pos
= positionStart
;
1054 const int increment
= (characterOffset
> 0) ? 1 : -1;
1055 while (characterOffset
!= 0) {
1056 const Sci::Position posNext
= NextPosition(pos
, increment
);
1058 return Sci::invalidPosition
;
1059 if (std::abs(pos
-posNext
) > 3) // 4 byte character = 2*UTF16.
1060 characterOffset
-= increment
;
1062 characterOffset
-= increment
;
1065 pos
= positionStart
+ characterOffset
;
1066 if ((pos
< 0) || (pos
> LengthNoExcept()))
1067 return Sci::invalidPosition
;
1072 int SCI_METHOD
Document::GetCharacterAndWidth(Sci_Position position
, Sci_Position
*pWidth
) const {
1073 int bytesInCharacter
= 1;
1074 const unsigned char leadByte
= cb
.UCharAt(position
);
1075 int character
= leadByte
;
1076 if (dbcsCodePage
&& !UTF8IsAscii(leadByte
)) {
1077 if (CpUtf8
== dbcsCodePage
) {
1078 const int widthCharBytes
= UTF8BytesOfLead
[leadByte
];
1079 unsigned char charBytes
[UTF8MaxBytes
] = {leadByte
,0,0,0};
1080 for (int b
=1; b
<widthCharBytes
; b
++)
1081 charBytes
[b
] = cb
.UCharAt(position
+b
);
1082 const int utf8status
= UTF8Classify(charBytes
, widthCharBytes
);
1083 if (utf8status
& UTF8MaskInvalid
) {
1084 // Report as singleton surrogate values which are invalid Unicode
1085 character
= 0xDC80 + leadByte
;
1087 bytesInCharacter
= utf8status
& UTF8MaskWidth
;
1088 character
= UnicodeFromUTF8(charBytes
);
1091 if (IsDBCSLeadByteNoExcept(leadByte
)) {
1092 const unsigned char trailByte
= cb
.UCharAt(position
+ 1);
1093 if (IsDBCSTrailByteNoExcept(trailByte
)) {
1094 bytesInCharacter
= 2;
1095 character
= (leadByte
<< 8) | trailByte
;
1101 *pWidth
= bytesInCharacter
;
1106 int SCI_METHOD
Document::CodePage() const {
1107 return dbcsCodePage
;
1110 bool SCI_METHOD
Document::IsDBCSLeadByte(char ch
) const {
1111 // Used by lexers so must match IDocument method exactly
1112 return IsDBCSLeadByteNoExcept(ch
);
1115 bool Document::IsDBCSLeadByteNoExcept(char ch
) const noexcept
{
1116 // Used inside core Scintilla
1117 // Byte ranges found in Wikipedia articles with relevant search strings in each case
1118 const unsigned char uch
= ch
;
1119 switch (dbcsCodePage
) {
1122 return ((uch
>= 0x81) && (uch
<= 0x9F)) ||
1123 ((uch
>= 0xE0) && (uch
<= 0xFC));
1124 // Lead bytes F0 to FC may be a Microsoft addition.
1127 return (uch
>= 0x81) && (uch
<= 0xFE);
1129 // Korean Wansung KS C-5601-1987
1130 return (uch
>= 0x81) && (uch
<= 0xFE);
1133 return (uch
>= 0x81) && (uch
<= 0xFE);
1135 // Korean Johab KS C-5601-1992
1137 ((uch
>= 0x84) && (uch
<= 0xD3)) ||
1138 ((uch
>= 0xD8) && (uch
<= 0xDE)) ||
1139 ((uch
>= 0xE0) && (uch
<= 0xF9));
1144 bool Document::IsDBCSTrailByteNoExcept(char ch
) const noexcept
{
1145 const unsigned char trail
= ch
;
1146 switch (dbcsCodePage
) {
1149 return (trail
!= 0x7F) &&
1150 ((trail
>= 0x40) && (trail
<= 0xFC));
1153 return (trail
!= 0x7F) &&
1154 ((trail
>= 0x40) && (trail
<= 0xFE));
1156 // Korean Wansung KS C-5601-1987
1158 ((trail
>= 0x41) && (trail
<= 0x5A)) ||
1159 ((trail
>= 0x61) && (trail
<= 0x7A)) ||
1160 ((trail
>= 0x81) && (trail
<= 0xFE));
1164 ((trail
>= 0x40) && (trail
<= 0x7E)) ||
1165 ((trail
>= 0xA1) && (trail
<= 0xFE));
1167 // Korean Johab KS C-5601-1992
1169 ((trail
>= 0x31) && (trail
<= 0x7E)) ||
1170 ((trail
>= 0x81) && (trail
<= 0xFE));
1175 int Document::DBCSDrawBytes(std::string_view text
) const noexcept
{
1176 if (text
.length() <= 1) {
1177 return static_cast<int>(text
.length());
1179 if (IsDBCSLeadByteNoExcept(text
[0])) {
1180 return IsDBCSTrailByteNoExcept(text
[1]) ? 2 : 1;
1186 bool Document::IsDBCSDualByteAt(Sci::Position pos
) const noexcept
{
1187 return IsDBCSLeadByteNoExcept(cb
.CharAt(pos
))
1188 && IsDBCSTrailByteNoExcept(cb
.CharAt(pos
+ 1));
1191 // Need to break text into segments near end but taking into account the
1192 // encoding to not break inside a UTF-8 or DBCS character and also trying
1193 // to avoid breaking inside a pair of combining characters, or inside
1195 // TODO: implement grapheme cluster boundaries,
1196 // see https://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries.
1198 // The segment length must always be long enough (more than 4 bytes)
1199 // so that there will be at least one whole character to make a segment.
1200 // For UTF-8, text must consist only of valid whole characters.
1201 // In preference order from best to worst:
1202 // 1) Break before or after spaces or controls
1203 // 2) Break at word and punctuation boundary for better kerning and ligature support
1204 // 3) Break after whole character, this may break combining characters
1206 size_t Document::SafeSegment(std::string_view text
) const noexcept
{
1207 // check space first as most written language use spaces.
1208 for (std::string_view::iterator it
= text
.end() - 1; it
!= text
.begin(); --it
) {
1209 if (IsBreakSpace(*it
)) {
1210 return it
- text
.begin();
1214 if (!dbcsCodePage
|| dbcsCodePage
== CpUtf8
) {
1215 // backward iterate for UTF-8 and single byte encoding to find word and punctuation boundary.
1216 std::string_view::iterator it
= text
.end() - 1;
1217 const bool punctuation
= IsPunctuation(*it
);
1220 if (punctuation
!= IsPunctuation(*it
)) {
1221 return it
- text
.begin() + 1;
1223 } while (it
!= text
.begin());
1225 it
= text
.end() - 1;
1227 // for UTF-8 go back to the start of last character.
1228 for (int trail
= 0; trail
< UTF8MaxBytes
- 1 && UTF8IsTrailByte(*it
); trail
++) {
1232 return it
- text
.begin();
1236 // forward iterate for DBCS to find word and punctuation boundary.
1237 size_t lastPunctuationBreak
= 0;
1238 size_t lastEncodingAllowedBreak
= 0;
1239 CharacterClass ccPrev
= CharacterClass::space
;
1240 for (size_t j
= 0; j
< text
.length();) {
1241 const unsigned char ch
= text
[j
];
1242 lastEncodingAllowedBreak
= j
++;
1244 CharacterClass cc
= CharacterClass::word
;
1245 if (UTF8IsAscii(ch
)) {
1246 if (IsPunctuation(ch
)) {
1247 cc
= CharacterClass::punctuation
;
1250 j
+= IsDBCSLeadByteNoExcept(ch
);
1254 lastPunctuationBreak
= lastEncodingAllowedBreak
;
1257 return lastPunctuationBreak
? lastPunctuationBreak
: lastEncodingAllowedBreak
;
1261 EncodingFamily
Document::CodePageFamily() const noexcept
{
1262 if (CpUtf8
== dbcsCodePage
)
1263 return EncodingFamily::unicode
;
1264 else if (dbcsCodePage
)
1265 return EncodingFamily::dbcs
;
1267 return EncodingFamily::eightBit
;
1270 void Document::ModifiedAt(Sci::Position pos
) noexcept
{
1271 if (endStyled
> pos
)
1275 void Document::CheckReadOnly() {
1276 if (cb
.IsReadOnly() && enteredReadOnlyCount
== 0) {
1277 enteredReadOnlyCount
++;
1278 NotifyModifyAttempt();
1279 enteredReadOnlyCount
--;
1283 void Document::TrimReplacement(std::string_view
&text
, Range
&range
) const noexcept
{
1284 while (!text
.empty() && !range
.Empty() && (text
.front() == CharAt(range
.start
))) {
1285 text
.remove_prefix(1);
1288 while (!text
.empty() && !range
.Empty() && (text
.back() == CharAt(range
.end
-1))) {
1289 text
.remove_suffix(1);
1294 // Document only modified by gateways DeleteChars, InsertString, Undo, Redo, and SetStyleAt.
1295 // SetStyleAt does not change the persistent state of a document
1297 bool Document::DeleteChars(Sci::Position pos
, Sci::Position len
) {
1302 if ((pos
+ len
) > LengthNoExcept())
1305 if (enteredModification
!= 0) {
1308 enteredModification
++;
1309 if (!cb
.IsReadOnly()) {
1312 ModificationFlags::BeforeDelete
| ModificationFlags::User
,
1315 const Sci::Line prevLinesTotal
= LinesTotal();
1316 const bool startSavePoint
= cb
.IsSavePoint();
1317 bool startSequence
= false;
1318 const char *text
= cb
.DeleteChars(pos
, len
, startSequence
);
1319 if (startSavePoint
&& cb
.IsCollectingUndo())
1320 NotifySavePoint(false);
1321 if ((pos
< LengthNoExcept()) || (pos
== 0))
1327 ModificationFlags::DeleteText
| ModificationFlags::User
|
1328 (startSequence
?ModificationFlags::StartAction
:ModificationFlags::None
),
1330 LinesTotal() - prevLinesTotal
, text
));
1332 enteredModification
--;
1334 return !cb
.IsReadOnly();
1338 * Insert a string with a length.
1340 Sci::Position
Document::InsertString(Sci::Position position
, const char *s
, Sci::Position insertLength
) {
1341 if (insertLength
<= 0) {
1344 CheckReadOnly(); // Application may change read only state here
1345 if (cb
.IsReadOnly()) {
1348 if (enteredModification
!= 0) {
1351 enteredModification
++;
1352 insertionSet
= false;
1356 ModificationFlags::InsertCheck
,
1357 position
, insertLength
,
1360 s
= insertion
.c_str();
1361 insertLength
= insertion
.length();
1365 ModificationFlags::BeforeInsert
| ModificationFlags::User
,
1366 position
, insertLength
,
1368 const Sci::Line prevLinesTotal
= LinesTotal();
1369 const bool startSavePoint
= cb
.IsSavePoint();
1370 bool startSequence
= false;
1371 const char *text
= cb
.InsertString(position
, s
, insertLength
, startSequence
);
1372 if (startSavePoint
&& cb
.IsCollectingUndo())
1373 NotifySavePoint(false);
1374 ModifiedAt(position
);
1377 ModificationFlags::InsertText
| ModificationFlags::User
|
1378 (startSequence
?ModificationFlags::StartAction
:ModificationFlags::None
),
1379 position
, insertLength
,
1380 LinesTotal() - prevLinesTotal
, text
));
1381 if (insertionSet
) { // Free memory as could be large
1382 std::string().swap(insertion
);
1384 enteredModification
--;
1385 return insertLength
;
1388 Sci::Position
Document::InsertString(Sci::Position position
, std::string_view sv
) {
1389 return InsertString(position
, sv
.data(), sv
.length());
1392 void Document::ChangeInsertion(const char *s
, Sci::Position length
) {
1393 insertionSet
= true;
1394 insertion
.assign(s
, length
);
1397 int SCI_METHOD
Document::AddData(const char *data
, Sci_Position length
) {
1399 const Sci::Position position
= Length();
1400 InsertString(position
, data
, length
);
1401 } catch (std::bad_alloc
&) {
1402 return static_cast<int>(Status::BadAlloc
);
1404 return static_cast<int>(Status::Failure
);
1406 return static_cast<int>(Status::Ok
);
1409 IDocumentEditable
*Document::AsDocumentEditable() noexcept
{
1410 return static_cast<IDocumentEditable
*>(this);
1413 void *SCI_METHOD
Document::ConvertToDocument() {
1414 return AsDocumentEditable();
1417 Sci::Position
Document::Undo() {
1418 Sci::Position newPos
= -1;
1420 if ((enteredModification
== 0) && (cb
.IsCollectingUndo())) {
1421 enteredModification
++;
1422 if (!cb
.IsReadOnly()) {
1423 const bool startSavePoint
= cb
.IsSavePoint();
1424 bool multiLine
= false;
1425 const int steps
= cb
.StartUndo();
1426 //Platform::DebugPrintf("Steps=%d\n", steps);
1427 Range coalescedRemove
; // Default is empty at 0
1428 for (int step
= 0; step
< steps
; step
++) {
1429 const Sci::Line prevLinesTotal
= LinesTotal();
1430 const Action action
= cb
.GetUndoStep();
1431 if (action
.at
== ActionType::remove
) {
1432 NotifyModified(DocModification(
1433 ModificationFlags::BeforeInsert
| ModificationFlags::Undo
, action
));
1434 } else if (action
.at
== ActionType::container
) {
1435 DocModification
dm(ModificationFlags::Container
| ModificationFlags::Undo
);
1436 dm
.token
= action
.position
;
1439 NotifyModified(DocModification(
1440 ModificationFlags::BeforeDelete
| ModificationFlags::Undo
, action
));
1442 cb
.PerformUndoStep();
1443 if (action
.at
!= ActionType::container
) {
1444 ModifiedAt(action
.position
);
1445 newPos
= action
.position
;
1448 ModificationFlags modFlags
= ModificationFlags::Undo
;
1449 // With undo, an insertion action becomes a deletion notification
1450 if (action
.at
== ActionType::remove
) {
1451 newPos
+= action
.lenData
;
1452 modFlags
|= ModificationFlags::InsertText
;
1453 if (coalescedRemove
.Contains(action
.position
)) {
1454 coalescedRemove
.end
+= action
.lenData
;
1455 newPos
= coalescedRemove
.end
;
1457 coalescedRemove
= Range(action
.position
, action
.position
+ action
.lenData
);
1459 } else if (action
.at
== ActionType::insert
) {
1460 modFlags
|= ModificationFlags::DeleteText
;
1461 coalescedRemove
= Range();
1464 modFlags
|= ModificationFlags::MultiStepUndoRedo
;
1465 const Sci::Line linesAdded
= LinesTotal() - prevLinesTotal
;
1466 if (linesAdded
!= 0)
1468 if (step
== steps
- 1) {
1469 modFlags
|= ModificationFlags::LastStepInUndoRedo
;
1471 modFlags
|= ModificationFlags::MultilineUndoRedo
;
1473 NotifyModified(DocModification(modFlags
, action
.position
, action
.lenData
,
1474 linesAdded
, action
.data
));
1477 const bool endSavePoint
= cb
.IsSavePoint();
1478 if (startSavePoint
!= endSavePoint
)
1479 NotifySavePoint(endSavePoint
);
1481 enteredModification
--;
1486 Sci::Position
Document::Redo() {
1487 Sci::Position newPos
= -1;
1489 if ((enteredModification
== 0) && (cb
.IsCollectingUndo())) {
1490 enteredModification
++;
1491 if (!cb
.IsReadOnly()) {
1492 const bool startSavePoint
= cb
.IsSavePoint();
1493 bool multiLine
= false;
1494 const int steps
= cb
.StartRedo();
1495 for (int step
= 0; step
< steps
; step
++) {
1496 const Sci::Line prevLinesTotal
= LinesTotal();
1497 const Action action
= cb
.GetRedoStep();
1498 if (action
.at
== ActionType::insert
) {
1499 NotifyModified(DocModification(
1500 ModificationFlags::BeforeInsert
| ModificationFlags::Redo
, action
));
1501 } else if (action
.at
== ActionType::container
) {
1502 DocModification
dm(ModificationFlags::Container
| ModificationFlags::Redo
);
1503 dm
.token
= action
.position
;
1506 NotifyModified(DocModification(
1507 ModificationFlags::BeforeDelete
| ModificationFlags::Redo
, action
));
1509 cb
.PerformRedoStep();
1510 if (action
.at
!= ActionType::container
) {
1511 ModifiedAt(action
.position
);
1512 newPos
= action
.position
;
1515 ModificationFlags modFlags
= ModificationFlags::Redo
;
1516 if (action
.at
== ActionType::insert
) {
1517 newPos
+= action
.lenData
;
1518 modFlags
|= ModificationFlags::InsertText
;
1519 } else if (action
.at
== ActionType::remove
) {
1520 modFlags
|= ModificationFlags::DeleteText
;
1523 modFlags
|= ModificationFlags::MultiStepUndoRedo
;
1524 const Sci::Line linesAdded
= LinesTotal() - prevLinesTotal
;
1525 if (linesAdded
!= 0)
1527 if (step
== steps
- 1) {
1528 modFlags
|= ModificationFlags::LastStepInUndoRedo
;
1530 modFlags
|= ModificationFlags::MultilineUndoRedo
;
1533 DocModification(modFlags
, action
.position
, action
.lenData
,
1534 linesAdded
, action
.data
));
1537 const bool endSavePoint
= cb
.IsSavePoint();
1538 if (startSavePoint
!= endSavePoint
)
1539 NotifySavePoint(endSavePoint
);
1541 enteredModification
--;
1546 int Document::UndoSequenceDepth() const noexcept
{
1547 return cb
.UndoSequenceDepth();
1550 void Document::DelChar(Sci::Position pos
) {
1551 DeleteChars(pos
, LenChar(pos
));
1554 void Document::DelCharBack(Sci::Position pos
) {
1557 } else if (IsCrLf(pos
- 2)) {
1558 DeleteChars(pos
- 2, 2);
1559 } else if (dbcsCodePage
) {
1560 const Sci::Position startChar
= NextPosition(pos
, -1);
1561 DeleteChars(startChar
, pos
- startChar
);
1563 DeleteChars(pos
- 1, 1);
1567 static constexpr Sci::Position
NextTab(Sci::Position pos
, Sci::Position tabSize
) noexcept
{
1568 return ((pos
/ tabSize
) + 1) * tabSize
;
1571 static std::string
CreateIndentation(Sci::Position indent
, int tabSize
, bool insertSpaces
) {
1572 std::string indentation
;
1573 if (!insertSpaces
) {
1574 while (indent
>= tabSize
) {
1575 indentation
+= '\t';
1579 while (indent
> 0) {
1586 int SCI_METHOD
Document::GetLineIndentation(Sci_Position line
) {
1588 if ((line
>= 0) && (line
< LinesTotal())) {
1589 const Sci::Position lineStart
= LineStart(line
);
1590 const Sci::Position length
= Length();
1591 for (Sci::Position i
= lineStart
; i
< length
; i
++) {
1592 const char ch
= cb
.CharAt(i
);
1595 else if (ch
== '\t')
1596 indent
= static_cast<int>(NextTab(indent
, tabInChars
));
1604 Sci::Position
Document::SetLineIndentation(Sci::Line line
, Sci::Position indent
) {
1605 const int indentOfLine
= GetLineIndentation(line
);
1608 if (indent
!= indentOfLine
) {
1609 const std::string linebuf
= CreateIndentation(indent
, tabInChars
, !useTabs
);
1610 const Sci::Position thisLineStart
= LineStart(line
);
1611 const Sci::Position indentPos
= GetLineIndentPosition(line
);
1613 DeleteChars(thisLineStart
, indentPos
- thisLineStart
);
1614 return thisLineStart
+ InsertString(thisLineStart
, linebuf
);
1616 return GetLineIndentPosition(line
);
1620 Sci::Position
Document::GetLineIndentPosition(Sci::Line line
) const {
1623 Sci::Position pos
= LineStart(line
);
1624 const Sci::Position length
= Length();
1625 while ((pos
< length
) && IsSpaceOrTab(cb
.CharAt(pos
))) {
1631 Sci::Position
Document::GetColumn(Sci::Position pos
) const {
1632 Sci::Position column
= 0;
1633 const Sci::Line line
= SciLineFromPosition(pos
);
1634 if ((line
>= 0) && (line
< LinesTotal())) {
1635 for (Sci::Position i
= LineStart(line
); i
< pos
;) {
1636 const char ch
= cb
.CharAt(i
);
1638 column
= NextTab(column
, tabInChars
);
1640 } else if (ch
== '\r') {
1642 } else if (ch
== '\n') {
1644 } else if (i
>= Length()) {
1646 } else if (UTF8IsAscii(ch
)) {
1651 i
= NextPosition(i
, 1);
1658 Sci::Position
Document::CountCharacters(Sci::Position startPos
, Sci::Position endPos
) const noexcept
{
1659 startPos
= MovePositionOutsideChar(startPos
, 1, false);
1660 endPos
= MovePositionOutsideChar(endPos
, -1, false);
1661 Sci::Position count
= 0;
1662 Sci::Position i
= startPos
;
1663 while (i
< endPos
) {
1665 i
= NextPosition(i
, 1);
1670 Sci::Position
Document::CountUTF16(Sci::Position startPos
, Sci::Position endPos
) const noexcept
{
1671 startPos
= MovePositionOutsideChar(startPos
, 1, false);
1672 endPos
= MovePositionOutsideChar(endPos
, -1, false);
1673 Sci::Position count
= 0;
1674 Sci::Position i
= startPos
;
1675 while (i
< endPos
) {
1677 const Sci::Position next
= NextPosition(i
, 1);
1685 Sci::Position
Document::FindColumn(Sci::Line line
, Sci::Position column
) {
1686 Sci::Position position
= LineStart(line
);
1687 if ((line
>= 0) && (line
< LinesTotal())) {
1688 Sci::Position columnCurrent
= 0;
1689 while ((columnCurrent
< column
) && (position
< Length())) {
1690 const char ch
= cb
.CharAt(position
);
1692 columnCurrent
= NextTab(columnCurrent
, tabInChars
);
1693 if (columnCurrent
> column
)
1696 } else if (ch
== '\r') {
1698 } else if (ch
== '\n') {
1702 position
= NextPosition(position
, 1);
1709 void Document::Indent(bool forwards
, Sci::Line lineBottom
, Sci::Line lineTop
) {
1710 // Dedent - suck white space off the front of the line to dedent by equivalent of a tab
1711 for (Sci::Line line
= lineBottom
; line
>= lineTop
; line
--) {
1712 const Sci::Position indentOfLine
= GetLineIndentation(line
);
1714 if (LineStart(line
) < LineEnd(line
)) {
1715 SetLineIndentation(line
, indentOfLine
+ IndentSize());
1718 SetLineIndentation(line
, indentOfLine
- IndentSize());
1725 constexpr std::string_view
EOLForMode(EndOfLine eolMode
) noexcept
{
1727 case EndOfLine::CrLf
:
1738 // Convert line endings for a piece of text to a particular mode.
1739 // Stop at len or when a NUL is found.
1740 std::string
Document::TransformLineEnds(const char *s
, size_t len
, EndOfLine eolModeWanted
) {
1742 const std::string_view eol
= EOLForMode(eolModeWanted
);
1743 for (size_t i
= 0; (i
< len
) && (s
[i
]); i
++) {
1744 if (s
[i
] == '\n' || s
[i
] == '\r') {
1746 if ((s
[i
] == '\r') && (i
+1 < len
) && (s
[i
+1] == '\n')) {
1750 dest
.push_back(s
[i
]);
1756 void Document::ConvertLineEnds(EndOfLine eolModeSet
) {
1759 for (Sci::Position pos
= 0; pos
< Length(); pos
++) {
1760 const char ch
= cb
.CharAt(pos
);
1762 if (cb
.CharAt(pos
+ 1) == '\n') {
1764 if (eolModeSet
== EndOfLine::Cr
) {
1765 DeleteChars(pos
+ 1, 1); // Delete the LF
1766 } else if (eolModeSet
== EndOfLine::Lf
) {
1767 DeleteChars(pos
, 1); // Delete the CR
1773 if (eolModeSet
== EndOfLine::CrLf
) {
1774 pos
+= InsertString(pos
+ 1, "\n", 1); // Insert LF
1775 } else if (eolModeSet
== EndOfLine::Lf
) {
1776 pos
+= InsertString(pos
, "\n", 1); // Insert LF
1777 DeleteChars(pos
, 1); // Delete CR
1781 } else if (ch
== '\n') {
1783 if (eolModeSet
== EndOfLine::CrLf
) {
1784 pos
+= InsertString(pos
, "\r", 1); // Insert CR
1785 } else if (eolModeSet
== EndOfLine::Cr
) {
1786 pos
+= InsertString(pos
, "\r", 1); // Insert CR
1787 DeleteChars(pos
, 1); // Delete LF
1795 std::string_view
Document::EOLString() const noexcept
{
1796 return EOLForMode(eolMode
);
1799 DocumentOption
Document::Options() const noexcept
{
1800 return (IsLarge() ? DocumentOption::TextLarge
: DocumentOption::Default
) |
1801 (cb
.HasStyles() ? DocumentOption::Default
: DocumentOption::StylesNone
);
1804 bool Document::IsWhiteLine(Sci::Line line
) const {
1805 Sci::Position currentChar
= LineStart(line
);
1806 const Sci::Position endLine
= LineEnd(line
);
1807 while (currentChar
< endLine
) {
1808 if (!IsSpaceOrTab(cb
.CharAt(currentChar
))) {
1816 Sci::Position
Document::ParaUp(Sci::Position pos
) const {
1817 Sci::Line line
= SciLineFromPosition(pos
);
1818 const Sci::Position start
= LineStart(line
);
1822 while (line
>= 0 && IsWhiteLine(line
)) { // skip empty lines
1825 while (line
>= 0 && !IsWhiteLine(line
)) { // skip non-empty lines
1829 return LineStart(line
);
1832 Sci::Position
Document::ParaDown(Sci::Position pos
) const {
1833 Sci::Line line
= SciLineFromPosition(pos
);
1834 while (line
< LinesTotal() && !IsWhiteLine(line
)) { // skip non-empty lines
1837 while (line
< LinesTotal() && IsWhiteLine(line
)) { // skip empty lines
1840 if (line
< LinesTotal())
1841 return LineStart(line
);
1842 else // end of a document
1843 return LineEnd(line
-1);
1846 CharacterClass
Document::WordCharacterClass(unsigned int ch
) const {
1847 if (dbcsCodePage
&& (ch
>= 0x80)) {
1848 if (CpUtf8
== dbcsCodePage
) {
1849 // Use hard coded Unicode class
1850 const CharacterCategory cc
= charMap
.CategoryFor(ch
);
1853 // Separator, Line/Paragraph
1856 return CharacterClass::newLine
;
1866 return CharacterClass::space
;
1878 // Mark - includes combining diacritics
1882 return CharacterClass::word
;
1897 return CharacterClass::punctuation
;
1902 return CharacterClass::word
;
1905 return charClass
.GetClass(static_cast<unsigned char>(ch
));
1909 * Used by commands that want to select whole words.
1910 * Finds the start of word at pos when delta < 0 or the end of the word when delta >= 0.
1912 Sci::Position
Document::ExtendWordSelect(Sci::Position pos
, int delta
, bool onlyWordCharacters
) const {
1913 CharacterClass ccStart
= CharacterClass::word
;
1915 if (!onlyWordCharacters
) {
1916 const CharacterExtracted ce
= CharacterBefore(pos
);
1917 ccStart
= WordCharacterClass(ce
.character
);
1920 const CharacterExtracted ce
= CharacterBefore(pos
);
1921 if (WordCharacterClass(ce
.character
) != ccStart
)
1923 pos
-= ce
.widthBytes
;
1926 if (!onlyWordCharacters
&& pos
< LengthNoExcept()) {
1927 const CharacterExtracted ce
= CharacterAfter(pos
);
1928 ccStart
= WordCharacterClass(ce
.character
);
1930 while (pos
< LengthNoExcept()) {
1931 const CharacterExtracted ce
= CharacterAfter(pos
);
1932 if (WordCharacterClass(ce
.character
) != ccStart
)
1934 pos
+= ce
.widthBytes
;
1937 return MovePositionOutsideChar(pos
, delta
, true);
1941 * Find the start of the next word in either a forward (delta >= 0) or backwards direction
1943 * This is looking for a transition between character classes although there is also some
1944 * additional movement to transit white space.
1945 * Used by cursor movement by word commands.
1947 Sci::Position
Document::NextWordStart(Sci::Position pos
, int delta
) const {
1950 const CharacterExtracted ce
= CharacterBefore(pos
);
1951 if (WordCharacterClass(ce
.character
) != CharacterClass::space
)
1953 pos
-= ce
.widthBytes
;
1956 CharacterExtracted ce
= CharacterBefore(pos
);
1957 const CharacterClass ccStart
= WordCharacterClass(ce
.character
);
1959 ce
= CharacterBefore(pos
);
1960 if (WordCharacterClass(ce
.character
) != ccStart
)
1962 pos
-= ce
.widthBytes
;
1966 CharacterExtracted ce
= CharacterAfter(pos
);
1967 const CharacterClass ccStart
= WordCharacterClass(ce
.character
);
1968 while (pos
< LengthNoExcept()) {
1969 ce
= CharacterAfter(pos
);
1970 if (WordCharacterClass(ce
.character
) != ccStart
)
1972 pos
+= ce
.widthBytes
;
1974 while (pos
< LengthNoExcept()) {
1975 ce
= CharacterAfter(pos
);
1976 if (WordCharacterClass(ce
.character
) != CharacterClass::space
)
1978 pos
+= ce
.widthBytes
;
1985 * Find the end of the next word in either a forward (delta >= 0) or backwards direction
1987 * This is looking for a transition between character classes although there is also some
1988 * additional movement to transit white space.
1989 * Used by cursor movement by word commands.
1991 Sci::Position
Document::NextWordEnd(Sci::Position pos
, int delta
) const {
1994 CharacterExtracted ce
= CharacterBefore(pos
);
1995 const CharacterClass ccStart
= WordCharacterClass(ce
.character
);
1996 if (ccStart
!= CharacterClass::space
) {
1998 ce
= CharacterBefore(pos
);
1999 if (WordCharacterClass(ce
.character
) != ccStart
)
2001 pos
-= ce
.widthBytes
;
2005 ce
= CharacterBefore(pos
);
2006 if (WordCharacterClass(ce
.character
) != CharacterClass::space
)
2008 pos
-= ce
.widthBytes
;
2012 while (pos
< LengthNoExcept()) {
2013 const CharacterExtracted ce
= CharacterAfter(pos
);
2014 if (WordCharacterClass(ce
.character
) != CharacterClass::space
)
2016 pos
+= ce
.widthBytes
;
2018 if (pos
< LengthNoExcept()) {
2019 CharacterExtracted ce
= CharacterAfter(pos
);
2020 const CharacterClass ccStart
= WordCharacterClass(ce
.character
);
2021 while (pos
< LengthNoExcept()) {
2022 ce
= CharacterAfter(pos
);
2023 if (WordCharacterClass(ce
.character
) != ccStart
)
2025 pos
+= ce
.widthBytes
;
2034 constexpr bool IsWordEdge(CharacterClass cc
, CharacterClass ccNext
) noexcept
{
2035 return (cc
!= ccNext
) &&
2036 (cc
== CharacterClass::word
|| cc
== CharacterClass::punctuation
);
2042 * Check that the character at the given position is a word or punctuation character and that
2043 * the previous character is of a different character class.
2045 bool Document::IsWordStartAt(Sci::Position pos
) const {
2046 if (pos
>= LengthNoExcept())
2049 const CharacterExtracted cePos
= CharacterAfter(pos
);
2050 // At start of document, treat as if space before so can be word start
2051 const CharacterExtracted cePrev
= (pos
> 0) ?
2052 CharacterBefore(pos
) : CharacterExtracted(' ', 1);
2053 return IsWordEdge(WordCharacterClass(cePos
.character
), WordCharacterClass(cePrev
.character
));
2059 * Check that the character before the given position is a word or punctuation character and that
2060 * the next character is of a different character class.
2062 bool Document::IsWordEndAt(Sci::Position pos
) const {
2065 if (pos
<= LengthNoExcept()) {
2066 // At end of document, treat as if space after so can be word end
2067 const CharacterExtracted cePos
= (pos
< LengthNoExcept()) ?
2068 CharacterAfter(pos
) : CharacterExtracted(' ', 1);
2069 const CharacterExtracted cePrev
= CharacterBefore(pos
);
2070 return IsWordEdge(WordCharacterClass(cePrev
.character
), WordCharacterClass(cePos
.character
));
2076 * Check that the given range is has transitions between character classes at both
2077 * ends and where the characters on the inside are word or punctuation characters.
2079 bool Document::IsWordAt(Sci::Position start
, Sci::Position end
) const {
2080 return (start
< end
) && IsWordStartAt(start
) && IsWordEndAt(end
);
2083 bool Document::MatchesWordOptions(bool word
, bool wordStart
, Sci::Position pos
, Sci::Position length
) const {
2084 return (!word
&& !wordStart
) ||
2085 (word
&& IsWordAt(pos
, pos
+ length
)) ||
2086 (wordStart
&& IsWordStartAt(pos
));
2089 bool Document::HasCaseFolder() const noexcept
{
2090 return pcf
!= nullptr;
2093 void Document::SetCaseFolder(std::unique_ptr
<CaseFolder
> pcf_
) noexcept
{
2094 pcf
= std::move(pcf_
);
2097 CharacterExtracted
Document::ExtractCharacter(Sci::Position position
) const noexcept
{
2098 const unsigned char leadByte
= cb
.UCharAt(position
);
2099 if (UTF8IsAscii(leadByte
)) {
2100 // Common case: ASCII character
2101 return CharacterExtracted(leadByte
, 1);
2103 const int widthCharBytes
= UTF8BytesOfLead
[leadByte
];
2104 unsigned char charBytes
[UTF8MaxBytes
] = { leadByte
, 0, 0, 0 };
2105 for (int b
=1; b
<widthCharBytes
; b
++)
2106 charBytes
[b
] = cb
.UCharAt(position
+ b
);
2107 return CharacterExtracted(charBytes
, widthCharBytes
);
2112 // Equivalent of memchr over the split view
2113 ptrdiff_t SplitFindChar(const SplitView
&view
, size_t start
, size_t length
, int ch
) noexcept
{
2114 size_t range1Length
= 0;
2115 if (start
< view
.length1
) {
2116 range1Length
= std::min(length
, view
.length1
- start
);
2117 const char *match
= static_cast<const char *>(memchr(view
.segment1
+ start
, ch
, range1Length
));
2119 return match
- view
.segment1
;
2121 start
+= range1Length
;
2123 const char *match2
= static_cast<const char *>(memchr(view
.segment2
+ start
, ch
, length
- range1Length
));
2125 return match2
- view
.segment2
;
2130 // Equivalent of memcmp over the split view
2131 // This does not call memcmp as search texts are commonly too short to overcome the
2133 bool SplitMatch(const SplitView
&view
, size_t start
, std::string_view text
) noexcept
{
2134 for (size_t i
= 0; i
< text
.length(); i
++) {
2135 if (view
.CharAt(i
+ start
) != text
[i
]) {
2145 * Find text in document, supporting both forward and backward
2146 * searches (just pass minPos > maxPos to do a backward search)
2147 * Has not been tested with backwards DBCS searches yet.
2149 Sci::Position
Document::FindText(Sci::Position minPos
, Sci::Position maxPos
, const char *search
,
2150 FindOption flags
, Sci::Position
*length
) {
2153 const bool caseSensitive
= FlagSet(flags
, FindOption::MatchCase
);
2154 const bool word
= FlagSet(flags
, FindOption::WholeWord
);
2155 const bool wordStart
= FlagSet(flags
, FindOption::WordStart
);
2156 const bool regExp
= FlagSet(flags
, FindOption::RegExp
);
2159 regex
= std::unique_ptr
<RegexSearchBase
>(CreateRegexSearch(&charClass
));
2160 return regex
->FindText(this, minPos
, maxPos
, search
, caseSensitive
, word
, wordStart
, flags
, length
);
2163 const bool forward
= minPos
<= maxPos
;
2164 const int increment
= forward
? 1 : -1;
2166 // Range endpoints should not be inside DBCS characters, but just in case, move them.
2167 const Sci::Position startPos
= MovePositionOutsideChar(minPos
, increment
, false);
2168 const Sci::Position endPos
= MovePositionOutsideChar(maxPos
, increment
, false);
2170 // Compute actual search ranges needed
2171 const Sci::Position lengthFind
= *length
;
2173 //Platform::DebugPrintf("Find %d %d %s %d\n", startPos, endPos, ft->lpstrText, lengthFind);
2174 const Sci::Position limitPos
= std::max(startPos
, endPos
);
2175 Sci::Position pos
= startPos
;
2177 // Back all of a character
2178 pos
= NextPosition(pos
, increment
);
2180 const SplitView cbView
= cb
.AllView();
2181 if (caseSensitive
) {
2182 const Sci::Position endSearch
= (startPos
<= endPos
) ? endPos
- lengthFind
+ 1 : endPos
;
2183 const unsigned char charStartSearch
= search
[0];
2184 if (forward
&& ((0 == dbcsCodePage
) || (CpUtf8
== dbcsCodePage
&& !UTF8IsTrailByte(charStartSearch
)))) {
2185 // This is a fast case where there is no need to test byte values to iterate
2186 // so becomes the equivalent of a memchr+memcmp loop.
2187 // UTF-8 search will not be self-synchronizing when starts with trail byte
2188 const std::string_view
suffix(search
+ 1, lengthFind
- 1);
2189 while (pos
< endSearch
) {
2190 pos
= SplitFindChar(cbView
, pos
, limitPos
- pos
, charStartSearch
);
2194 if (SplitMatch(cbView
, pos
+ 1, suffix
) && MatchesWordOptions(word
, wordStart
, pos
, lengthFind
)) {
2200 while (forward
? (pos
< endSearch
) : (pos
>= endSearch
)) {
2201 const unsigned char leadByte
= cbView
.CharAt(pos
);
2202 if (leadByte
== charStartSearch
) {
2203 bool found
= (pos
+ lengthFind
) <= limitPos
;
2204 // SplitMatch could be called here but it is slower with g++ -O2
2205 for (int indexSearch
= 1; (indexSearch
< lengthFind
) && found
; indexSearch
++) {
2206 found
= cbView
.CharAt(pos
+ indexSearch
) == search
[indexSearch
];
2208 if (found
&& MatchesWordOptions(word
, wordStart
, pos
, lengthFind
)) {
2212 if (forward
&& UTF8IsAscii(leadByte
)) {
2216 if (!NextCharacter(pos
, increment
)) {
2225 } else if (CpUtf8
== dbcsCodePage
) {
2226 constexpr size_t maxFoldingExpansion
= 4;
2227 std::vector
<char> searchThing((lengthFind
+1) * UTF8MaxBytes
* maxFoldingExpansion
+ 1);
2228 const size_t lenSearch
=
2229 pcf
->Fold(&searchThing
[0], searchThing
.size(), search
, lengthFind
);
2230 while (forward
? (pos
< endPos
) : (pos
>= endPos
)) {
2231 int widthFirstCharacter
= 1;
2232 Sci::Position posIndexDocument
= pos
;
2233 size_t indexSearch
= 0;
2234 bool characterMatches
= true;
2235 while (indexSearch
< lenSearch
) {
2236 const unsigned char leadByte
= cbView
.CharAt(posIndexDocument
);
2239 if (UTF8IsAscii(leadByte
)) {
2240 if ((posIndexDocument
+ 1) > limitPos
) {
2243 characterMatches
= searchThing
[indexSearch
] == MakeLowerCase(leadByte
);
2245 char bytes
[UTF8MaxBytes
]{ static_cast<char>(leadByte
) };
2246 const int widthCharBytes
= UTF8BytesOfLead
[leadByte
];
2247 for (int b
= 1; b
< widthCharBytes
; b
++) {
2248 bytes
[b
] = cbView
.CharAt(posIndexDocument
+ b
);
2250 widthChar
= UTF8Classify(bytes
, widthCharBytes
) & UTF8MaskWidth
;
2251 if (!indexSearch
) { // First character
2252 widthFirstCharacter
= widthChar
;
2254 if ((posIndexDocument
+ widthChar
) > limitPos
) {
2257 char folded
[UTF8MaxBytes
* maxFoldingExpansion
+ 1];
2258 lenFlat
= pcf
->Fold(folded
, sizeof(folded
), bytes
, widthChar
);
2259 // memcmp may examine lenFlat bytes in both arguments so assert it doesn't read past end of searchThing
2260 assert((indexSearch
+ lenFlat
) <= searchThing
.size());
2261 // Does folded match the buffer
2262 characterMatches
= 0 == memcmp(folded
, &searchThing
[0] + indexSearch
, lenFlat
);
2264 if (!characterMatches
) {
2267 posIndexDocument
+= widthChar
;
2268 indexSearch
+= lenFlat
;
2270 if (characterMatches
&& (indexSearch
== lenSearch
)) {
2271 if (MatchesWordOptions(word
, wordStart
, pos
, posIndexDocument
- pos
)) {
2272 *length
= posIndexDocument
- pos
;
2277 pos
+= widthFirstCharacter
;
2279 if (!NextCharacter(pos
, increment
)) {
2284 } else if (dbcsCodePage
) {
2285 constexpr size_t maxBytesCharacter
= 2;
2286 constexpr size_t maxFoldingExpansion
= 4;
2287 std::vector
<char> searchThing((lengthFind
+1) * maxBytesCharacter
* maxFoldingExpansion
+ 1);
2288 const size_t lenSearch
= pcf
->Fold(&searchThing
[0], searchThing
.size(), search
, lengthFind
);
2289 while (forward
? (pos
< endPos
) : (pos
>= endPos
)) {
2290 int widthFirstCharacter
= 0;
2291 Sci::Position indexDocument
= 0;
2292 size_t indexSearch
= 0;
2293 bool characterMatches
= true;
2294 while (((pos
+ indexDocument
) < limitPos
) &&
2295 (indexSearch
< lenSearch
)) {
2296 const unsigned char leadByte
= cbView
.CharAt(pos
+ indexDocument
);
2297 const int widthChar
= (!UTF8IsAscii(leadByte
) && IsDBCSLeadByteNoExcept(leadByte
)) ? 2 : 1;
2298 if (!widthFirstCharacter
) {
2299 widthFirstCharacter
= widthChar
;
2301 if ((pos
+ indexDocument
+ widthChar
) > limitPos
) {
2305 if (widthChar
== 1) {
2306 characterMatches
= searchThing
[indexSearch
] == MakeLowerCase(leadByte
);
2308 const char bytes
[maxBytesCharacter
+ 1] {
2309 static_cast<char>(leadByte
),
2310 cbView
.CharAt(pos
+ indexDocument
+ 1)
2312 char folded
[maxBytesCharacter
* maxFoldingExpansion
+ 1];
2313 lenFlat
= pcf
->Fold(folded
, sizeof(folded
), bytes
, widthChar
);
2314 // memcmp may examine lenFlat bytes in both arguments so assert it doesn't read past end of searchThing
2315 assert((indexSearch
+ lenFlat
) <= searchThing
.size());
2316 // Does folded match the buffer
2317 characterMatches
= 0 == memcmp(folded
, &searchThing
[0] + indexSearch
, lenFlat
);
2319 if (!characterMatches
) {
2322 indexDocument
+= widthChar
;
2323 indexSearch
+= lenFlat
;
2325 if (characterMatches
&& (indexSearch
== lenSearch
)) {
2326 if (MatchesWordOptions(word
, wordStart
, pos
, indexDocument
)) {
2327 *length
= indexDocument
;
2332 pos
+= widthFirstCharacter
;
2334 if (!NextCharacter(pos
, increment
)) {
2340 const Sci::Position endSearch
= (startPos
<= endPos
) ? endPos
- lengthFind
+ 1 : endPos
;
2341 std::vector
<char> searchThing(lengthFind
+ 1);
2342 pcf
->Fold(&searchThing
[0], searchThing
.size(), search
, lengthFind
);
2343 while (forward
? (pos
< endSearch
) : (pos
>= endSearch
)) {
2344 bool found
= (pos
+ lengthFind
) <= limitPos
;
2345 for (int indexSearch
= 0; (indexSearch
< lengthFind
) && found
; indexSearch
++) {
2346 const char ch
= cbView
.CharAt(pos
+ indexSearch
);
2347 const char chTest
= searchThing
[indexSearch
];
2348 if (UTF8IsAscii(ch
)) {
2349 found
= chTest
== MakeLowerCase(ch
);
2352 pcf
->Fold(folded
, sizeof(folded
), &ch
, 1);
2353 found
= folded
[0] == chTest
;
2356 if (found
&& MatchesWordOptions(word
, wordStart
, pos
, lengthFind
)) {
2363 //Platform::DebugPrintf("Not found\n");
2367 const char *Document::SubstituteByPosition(const char *text
, Sci::Position
*length
) {
2369 return regex
->SubstituteByPosition(this, text
, length
);
2374 LineCharacterIndexType
Document::LineCharacterIndex() const noexcept
{
2375 return cb
.LineCharacterIndex();
2378 void Document::AllocateLineCharacterIndex(LineCharacterIndexType lineCharacterIndex
) {
2379 return cb
.AllocateLineCharacterIndex(lineCharacterIndex
);
2382 void Document::ReleaseLineCharacterIndex(LineCharacterIndexType lineCharacterIndex
) {
2383 return cb
.ReleaseLineCharacterIndex(lineCharacterIndex
);
2386 Sci::Line
Document::LinesTotal() const noexcept
{
2390 void Document::AllocateLines(Sci::Line lines
) {
2391 cb
.AllocateLines(lines
);
2394 void Document::SetDefaultCharClasses(bool includeWordClass
) {
2395 charClass
.SetDefaultCharClasses(includeWordClass
);
2398 void Document::SetCharClasses(const unsigned char *chars
, CharacterClass newCharClass
) {
2399 charClass
.SetCharClasses(chars
, newCharClass
);
2402 int Document::GetCharsOfClass(CharacterClass characterClass
, unsigned char *buffer
) const {
2403 return charClass
.GetCharsOfClass(characterClass
, buffer
);
2406 void Document::SetCharacterCategoryOptimization(int countCharacters
) {
2407 charMap
.Optimize(countCharacters
);
2410 int Document::CharacterCategoryOptimization() const noexcept
{
2411 return charMap
.Size();
2414 void SCI_METHOD
Document::StartStyling(Sci_Position position
) {
2415 endStyled
= position
;
2418 bool SCI_METHOD
Document::SetStyleFor(Sci_Position length
, char style
) {
2419 if (enteredStyling
!= 0) {
2423 const Sci::Position prevEndStyled
= endStyled
;
2424 if (cb
.SetStyleFor(endStyled
, length
, style
)) {
2425 const DocModification
mh(ModificationFlags::ChangeStyle
| ModificationFlags::User
,
2426 prevEndStyled
, length
);
2429 endStyled
+= length
;
2435 bool SCI_METHOD
Document::SetStyles(Sci_Position length
, const char *styles
) {
2436 if (enteredStyling
!= 0) {
2440 bool didChange
= false;
2441 Sci::Position startMod
= 0;
2442 Sci::Position endMod
= 0;
2443 for (int iPos
= 0; iPos
< length
; iPos
++, endStyled
++) {
2444 PLATFORM_ASSERT(endStyled
< Length());
2445 if (cb
.SetStyleAt(endStyled
, styles
[iPos
])) {
2447 startMod
= endStyled
;
2454 const DocModification
mh(ModificationFlags::ChangeStyle
| ModificationFlags::User
,
2455 startMod
, endMod
- startMod
+ 1);
2463 void Document::EnsureStyledTo(Sci::Position pos
) {
2464 if ((enteredStyling
== 0) && (pos
> GetEndStyled())) {
2465 IncrementStyleClock();
2466 if (pli
&& !pli
->UseContainerLexing()) {
2467 const Sci::Position endStyledTo
= LineStartPosition(GetEndStyled());
2468 pli
->Colourise(endStyledTo
, pos
);
2470 // Ask the watchers to style, and stop as soon as one responds.
2471 for (std::vector
<WatcherWithUserData
>::iterator it
= watchers
.begin();
2472 (pos
> GetEndStyled()) && (it
!= watchers
.end()); ++it
) {
2473 it
->watcher
->NotifyStyleNeeded(this, it
->userData
, pos
);
2479 void Document::StyleToAdjustingLineDuration(Sci::Position pos
) {
2480 const Sci::Position stylingStart
= GetEndStyled();
2481 ElapsedPeriod epStyling
;
2482 EnsureStyledTo(pos
);
2483 durationStyleOneByte
.AddSample(pos
- stylingStart
, epStyling
.Duration());
2486 LexInterface
*Document::GetLexInterface() const noexcept
{
2490 void Document::SetLexInterface(std::unique_ptr
<LexInterface
> pLexInterface
) noexcept
{
2491 pli
= std::move(pLexInterface
);
2494 int SCI_METHOD
Document::SetLineState(Sci_Position line
, int state
) {
2495 const int statePrevious
= States()->SetLineState(line
, state
, LinesTotal());
2496 if (state
!= statePrevious
) {
2497 const DocModification
mh(ModificationFlags::ChangeLineState
, LineStart(line
), 0, 0, nullptr,
2498 static_cast<Sci::Line
>(line
));
2501 return statePrevious
;
2504 int SCI_METHOD
Document::GetLineState(Sci_Position line
) const {
2505 return States()->GetLineState(line
);
2508 Sci::Line
Document::GetMaxLineState() const noexcept
{
2509 return States()->GetMaxLineState();
2512 void SCI_METHOD
Document::ChangeLexerState(Sci_Position start
, Sci_Position end
) {
2513 const DocModification
mh(ModificationFlags::LexerState
, start
,
2514 end
-start
, 0, nullptr, 0);
2518 StyledText
Document::MarginStyledText(Sci::Line line
) const noexcept
{
2519 const LineAnnotation
*pla
= Margins();
2520 return StyledText(pla
->Length(line
), pla
->Text(line
),
2521 pla
->MultipleStyles(line
), pla
->Style(line
), pla
->Styles(line
));
2524 void Document::MarginSetText(Sci::Line line
, const char *text
) {
2525 Margins()->SetText(line
, text
);
2526 const DocModification
mh(ModificationFlags::ChangeMargin
, LineStart(line
),
2527 0, 0, nullptr, line
);
2531 void Document::MarginSetStyle(Sci::Line line
, int style
) {
2532 Margins()->SetStyle(line
, style
);
2533 NotifyModified(DocModification(ModificationFlags::ChangeMargin
, LineStart(line
),
2534 0, 0, nullptr, line
));
2537 void Document::MarginSetStyles(Sci::Line line
, const unsigned char *styles
) {
2538 Margins()->SetStyles(line
, styles
);
2539 NotifyModified(DocModification(ModificationFlags::ChangeMargin
, LineStart(line
),
2540 0, 0, nullptr, line
));
2543 void Document::MarginClearAll() {
2544 const Sci::Line maxEditorLine
= LinesTotal();
2545 for (Sci::Line l
=0; l
<maxEditorLine
; l
++)
2546 MarginSetText(l
, nullptr);
2547 // Free remaining data
2548 Margins()->ClearAll();
2551 StyledText
Document::AnnotationStyledText(Sci::Line line
) const noexcept
{
2552 const LineAnnotation
*pla
= Annotations();
2553 return StyledText(pla
->Length(line
), pla
->Text(line
),
2554 pla
->MultipleStyles(line
), pla
->Style(line
), pla
->Styles(line
));
2557 void Document::AnnotationSetText(Sci::Line line
, const char *text
) {
2558 if (line
>= 0 && line
< LinesTotal()) {
2559 const Sci::Line linesBefore
= AnnotationLines(line
);
2560 Annotations()->SetText(line
, text
);
2561 const int linesAfter
= AnnotationLines(line
);
2562 DocModification
mh(ModificationFlags::ChangeAnnotation
, LineStart(line
),
2563 0, 0, nullptr, line
);
2564 mh
.annotationLinesAdded
= linesAfter
- linesBefore
;
2569 void Document::AnnotationSetStyle(Sci::Line line
, int style
) {
2570 if (line
>= 0 && line
< LinesTotal()) {
2571 Annotations()->SetStyle(line
, style
);
2572 const DocModification
mh(ModificationFlags::ChangeAnnotation
, LineStart(line
),
2573 0, 0, nullptr, line
);
2578 void Document::AnnotationSetStyles(Sci::Line line
, const unsigned char *styles
) {
2579 if (line
>= 0 && line
< LinesTotal()) {
2580 Annotations()->SetStyles(line
, styles
);
2584 int Document::AnnotationLines(Sci::Line line
) const noexcept
{
2585 return Annotations()->Lines(line
);
2588 void Document::AnnotationClearAll() {
2589 if (Annotations()->Empty()) {
2592 const Sci::Line maxEditorLine
= LinesTotal();
2593 for (Sci::Line l
=0; l
<maxEditorLine
; l
++)
2594 AnnotationSetText(l
, nullptr);
2595 // Free remaining data
2596 Annotations()->ClearAll();
2599 StyledText
Document::EOLAnnotationStyledText(Sci::Line line
) const noexcept
{
2600 const LineAnnotation
*pla
= EOLAnnotations();
2601 return StyledText(pla
->Length(line
), pla
->Text(line
),
2602 pla
->MultipleStyles(line
), pla
->Style(line
), pla
->Styles(line
));
2605 void Document::EOLAnnotationSetText(Sci::Line line
, const char *text
) {
2606 if (line
>= 0 && line
< LinesTotal()) {
2607 EOLAnnotations()->SetText(line
, text
);
2608 const DocModification
mh(ModificationFlags::ChangeEOLAnnotation
, LineStart(line
),
2609 0, 0, nullptr, line
);
2614 void Document::EOLAnnotationSetStyle(Sci::Line line
, int style
) {
2615 if (line
>= 0 && line
< LinesTotal()) {
2616 EOLAnnotations()->SetStyle(line
, style
);
2617 const DocModification
mh(ModificationFlags::ChangeEOLAnnotation
, LineStart(line
),
2618 0, 0, nullptr, line
);
2623 void Document::EOLAnnotationClearAll() {
2624 if (EOLAnnotations()->Empty()) {
2627 const Sci::Line maxEditorLine
= LinesTotal();
2628 for (Sci::Line l
=0; l
<maxEditorLine
; l
++)
2629 EOLAnnotationSetText(l
, nullptr);
2630 // Free remaining data
2631 EOLAnnotations()->ClearAll();
2634 void Document::IncrementStyleClock() noexcept
{
2635 styleClock
= (styleClock
+ 1) % 0x100000;
2638 void SCI_METHOD
Document::DecorationSetCurrentIndicator(int indicator
) {
2639 decorations
->SetCurrentIndicator(indicator
);
2642 void SCI_METHOD
Document::DecorationFillRange(Sci_Position position
, int value
, Sci_Position fillLength
) {
2643 const FillResult
<Sci::Position
> fr
= decorations
->FillRange(
2644 position
, value
, fillLength
);
2646 const DocModification
mh(ModificationFlags::ChangeIndicator
| ModificationFlags::User
,
2647 fr
.position
, fr
.fillLength
);
2652 bool Document::AddWatcher(DocWatcher
*watcher
, void *userData
) {
2653 const WatcherWithUserData
wwud(watcher
, userData
);
2654 std::vector
<WatcherWithUserData
>::iterator it
=
2655 std::find(watchers
.begin(), watchers
.end(), wwud
);
2656 if (it
!= watchers
.end())
2658 watchers
.push_back(wwud
);
2662 bool Document::RemoveWatcher(DocWatcher
*watcher
, void *userData
) noexcept
{
2664 // This can never fail as WatcherWithUserData constructor and == are noexcept
2665 // but std::find is not noexcept.
2666 std::vector
<WatcherWithUserData
>::iterator it
=
2667 std::find(watchers
.begin(), watchers
.end(), WatcherWithUserData(watcher
, userData
));
2668 if (it
!= watchers
.end()) {
2673 // Ignore any exception
2678 void Document::NotifyModifyAttempt() {
2679 for (const WatcherWithUserData
&watcher
: watchers
) {
2680 watcher
.watcher
->NotifyModifyAttempt(this, watcher
.userData
);
2684 void Document::NotifySavePoint(bool atSavePoint
) {
2685 for (const WatcherWithUserData
&watcher
: watchers
) {
2686 watcher
.watcher
->NotifySavePoint(this, watcher
.userData
, atSavePoint
);
2690 void Document::NotifyModified(DocModification mh
) {
2691 if (FlagSet(mh
.modificationType
, ModificationFlags::InsertText
)) {
2692 decorations
->InsertSpace(mh
.position
, mh
.length
);
2693 } else if (FlagSet(mh
.modificationType
, ModificationFlags::DeleteText
)) {
2694 decorations
->DeleteRange(mh
.position
, mh
.length
);
2696 for (const WatcherWithUserData
&watcher
: watchers
) {
2697 watcher
.watcher
->NotifyModified(this, mh
, watcher
.userData
);
2701 bool Document::IsWordPartSeparator(unsigned int ch
) const {
2702 return (WordCharacterClass(ch
) == CharacterClass::word
) && IsPunctuation(ch
);
2705 Sci::Position
Document::WordPartLeft(Sci::Position pos
) const {
2707 pos
-= CharacterBefore(pos
).widthBytes
;
2708 CharacterExtracted ceStart
= CharacterAfter(pos
);
2709 if (IsWordPartSeparator(ceStart
.character
)) {
2710 while (pos
> 0 && IsWordPartSeparator(CharacterAfter(pos
).character
)) {
2711 pos
-= CharacterBefore(pos
).widthBytes
;
2715 ceStart
= CharacterAfter(pos
);
2716 pos
-= CharacterBefore(pos
).widthBytes
;
2717 if (IsLowerCase(ceStart
.character
)) {
2718 while (pos
> 0 && IsLowerCase(CharacterAfter(pos
).character
))
2719 pos
-= CharacterBefore(pos
).widthBytes
;
2720 if (!IsUpperCase(CharacterAfter(pos
).character
) && !IsLowerCase(CharacterAfter(pos
).character
))
2721 pos
+= CharacterAfter(pos
).widthBytes
;
2722 } else if (IsUpperCase(ceStart
.character
)) {
2723 while (pos
> 0 && IsUpperCase(CharacterAfter(pos
).character
))
2724 pos
-= CharacterBefore(pos
).widthBytes
;
2725 if (!IsUpperCase(CharacterAfter(pos
).character
))
2726 pos
+= CharacterAfter(pos
).widthBytes
;
2727 } else if (IsADigit(ceStart
.character
)) {
2728 while (pos
> 0 && IsADigit(CharacterAfter(pos
).character
))
2729 pos
-= CharacterBefore(pos
).widthBytes
;
2730 if (!IsADigit(CharacterAfter(pos
).character
))
2731 pos
+= CharacterAfter(pos
).widthBytes
;
2732 } else if (IsPunctuation(ceStart
.character
)) {
2733 while (pos
> 0 && IsPunctuation(CharacterAfter(pos
).character
))
2734 pos
-= CharacterBefore(pos
).widthBytes
;
2735 if (!IsPunctuation(CharacterAfter(pos
).character
))
2736 pos
+= CharacterAfter(pos
).widthBytes
;
2737 } else if (IsASpace(ceStart
.character
)) {
2738 while (pos
> 0 && IsASpace(CharacterAfter(pos
).character
))
2739 pos
-= CharacterBefore(pos
).widthBytes
;
2740 if (!IsASpace(CharacterAfter(pos
).character
))
2741 pos
+= CharacterAfter(pos
).widthBytes
;
2742 } else if (!IsASCII(ceStart
.character
)) {
2743 while (pos
> 0 && !IsASCII(CharacterAfter(pos
).character
))
2744 pos
-= CharacterBefore(pos
).widthBytes
;
2745 if (IsASCII(CharacterAfter(pos
).character
))
2746 pos
+= CharacterAfter(pos
).widthBytes
;
2748 pos
+= CharacterAfter(pos
).widthBytes
;
2755 Sci::Position
Document::WordPartRight(Sci::Position pos
) const {
2756 CharacterExtracted ceStart
= CharacterAfter(pos
);
2757 const Sci::Position length
= LengthNoExcept();
2758 if (IsWordPartSeparator(ceStart
.character
)) {
2759 while (pos
< length
&& IsWordPartSeparator(CharacterAfter(pos
).character
))
2760 pos
+= CharacterAfter(pos
).widthBytes
;
2761 ceStart
= CharacterAfter(pos
);
2763 if (!IsASCII(ceStart
.character
)) {
2764 while (pos
< length
&& !IsASCII(CharacterAfter(pos
).character
))
2765 pos
+= CharacterAfter(pos
).widthBytes
;
2766 } else if (IsLowerCase(ceStart
.character
)) {
2767 while (pos
< length
&& IsLowerCase(CharacterAfter(pos
).character
))
2768 pos
+= CharacterAfter(pos
).widthBytes
;
2769 } else if (IsUpperCase(ceStart
.character
)) {
2770 if (IsLowerCase(CharacterAfter(pos
+ ceStart
.widthBytes
).character
)) {
2771 pos
+= CharacterAfter(pos
).widthBytes
;
2772 while (pos
< length
&& IsLowerCase(CharacterAfter(pos
).character
))
2773 pos
+= CharacterAfter(pos
).widthBytes
;
2775 while (pos
< length
&& IsUpperCase(CharacterAfter(pos
).character
))
2776 pos
+= CharacterAfter(pos
).widthBytes
;
2778 if (IsLowerCase(CharacterAfter(pos
).character
) && IsUpperCase(CharacterBefore(pos
).character
))
2779 pos
-= CharacterBefore(pos
).widthBytes
;
2780 } else if (IsADigit(ceStart
.character
)) {
2781 while (pos
< length
&& IsADigit(CharacterAfter(pos
).character
))
2782 pos
+= CharacterAfter(pos
).widthBytes
;
2783 } else if (IsPunctuation(ceStart
.character
)) {
2784 while (pos
< length
&& IsPunctuation(CharacterAfter(pos
).character
))
2785 pos
+= CharacterAfter(pos
).widthBytes
;
2786 } else if (IsASpace(ceStart
.character
)) {
2787 while (pos
< length
&& IsASpace(CharacterAfter(pos
).character
))
2788 pos
+= CharacterAfter(pos
).widthBytes
;
2790 pos
+= CharacterAfter(pos
).widthBytes
;
2795 Sci::Position
Document::ExtendStyleRange(Sci::Position pos
, int delta
, bool singleLine
) noexcept
{
2796 const char sStart
= cb
.StyleAt(pos
);
2798 while (pos
> 0 && (cb
.StyleAt(pos
) == sStart
) && (!singleLine
|| !IsEOLCharacter(cb
.CharAt(pos
))))
2802 while (pos
< (LengthNoExcept()) && (cb
.StyleAt(pos
) == sStart
) && (!singleLine
|| !IsEOLCharacter(cb
.CharAt(pos
))))
2808 static char BraceOpposite(char ch
) noexcept
{
2831 // TODO: should be able to extend styled region to find matching brace
2832 Sci::Position
Document::BraceMatch(Sci::Position position
, Sci::Position
/*maxReStyle*/, Sci::Position startPos
, bool useStartPos
) noexcept
{
2833 const char chBrace
= CharAt(position
);
2834 const char chSeek
= BraceOpposite(chBrace
);
2837 const int styBrace
= StyleIndexAt(position
);
2839 if (chBrace
== '(' || chBrace
== '[' || chBrace
== '{' || chBrace
== '<')
2842 position
= useStartPos
? startPos
: NextPosition(position
, direction
);
2843 while ((position
>= 0) && (position
< LengthNoExcept())) {
2844 const char chAtPos
= CharAt(position
);
2845 const int styAtPos
= StyleIndexAt(position
);
2846 if ((position
> GetEndStyled()) || (styAtPos
== styBrace
)) {
2847 if (chAtPos
== chBrace
)
2849 if (chAtPos
== chSeek
)
2854 const Sci::Position positionBeforeMove
= position
;
2855 position
= NextPosition(position
, direction
);
2856 if (position
== positionBeforeMove
)
2863 * Implementation of RegexSearchBase for the default built-in regular expression engine
2865 class BuiltinRegex
: public RegexSearchBase
{
2867 explicit BuiltinRegex(CharClassify
*charClassTable
) : search(charClassTable
) {}
2869 Sci::Position
FindText(Document
*doc
, Sci::Position minPos
, Sci::Position maxPos
, const char *s
,
2870 bool caseSensitive
, bool word
, bool wordStart
, FindOption flags
,
2871 Sci::Position
*length
) override
;
2873 const char *SubstituteByPosition(Document
*doc
, const char *text
, Sci::Position
*length
) override
;
2877 std::string substituted
;
2883 * RESearchRange keeps track of search range.
2885 class RESearchRange
{
2887 const Document
*doc
;
2889 Sci::Position startPos
;
2890 Sci::Position endPos
;
2891 Sci::Line lineRangeStart
;
2892 Sci::Line lineRangeEnd
;
2893 Sci::Line lineRangeBreak
;
2894 RESearchRange(const Document
*doc_
, Sci::Position minPos
, Sci::Position maxPos
) noexcept
: doc(doc_
) {
2895 increment
= (minPos
<= maxPos
) ? 1 : -1;
2897 // Range endpoints should not be inside DBCS characters or between a CR and LF,
2898 // but just in case, move them.
2899 startPos
= doc
->MovePositionOutsideChar(minPos
, 1, true);
2900 endPos
= doc
->MovePositionOutsideChar(maxPos
, 1, true);
2902 lineRangeStart
= doc
->SciLineFromPosition(startPos
);
2903 lineRangeEnd
= doc
->SciLineFromPosition(endPos
);
2904 lineRangeBreak
= lineRangeEnd
+ increment
;
2906 Range
LineRange(Sci::Line line
, Sci::Position lineStartPos
, Sci::Position lineEndPos
) const noexcept
{
2907 Range
range(lineStartPos
, lineEndPos
);
2908 if (increment
== 1) {
2909 if (line
== lineRangeStart
)
2910 range
.start
= startPos
;
2911 if (line
== lineRangeEnd
)
2914 if (line
== lineRangeEnd
)
2915 range
.start
= endPos
;
2916 if (line
== lineRangeStart
)
2917 range
.end
= startPos
;
2923 // Define a way for the Regular Expression code to access the document
2924 class DocumentIndexer final
: public CharacterIndexer
{
2928 DocumentIndexer(Document
*pdoc_
, Sci::Position end_
) noexcept
:
2929 pdoc(pdoc_
), end(end_
) {
2932 char CharAt(Sci::Position index
) const noexcept override
{
2933 if (index
< 0 || index
>= end
)
2936 return pdoc
->CharAt(index
);
2938 Sci::Position
MovePositionOutsideChar(Sci::Position pos
, Sci::Position moveDir
) const noexcept override
{
2939 return pdoc
->MovePositionOutsideChar(pos
, moveDir
, false);
2943 #ifndef NO_CXX11_REGEX
2945 class ByteIterator
{
2947 using iterator_category
= std::bidirectional_iterator_tag
;
2948 using value_type
= char;
2949 using difference_type
= ptrdiff_t;
2950 using pointer
= char*;
2951 using reference
= char&;
2953 const Document
*doc
;
2954 Sci::Position position
;
2956 explicit ByteIterator(const Document
*doc_
=nullptr, Sci::Position position_
=0) noexcept
:
2957 doc(doc_
), position(position_
) {
2959 char operator*() const noexcept
{
2960 return doc
->CharAt(position
);
2962 ByteIterator
&operator++() noexcept
{
2966 ByteIterator
operator++(int) noexcept
{
2967 ByteIterator
retVal(*this);
2971 ByteIterator
&operator--() noexcept
{
2975 bool operator==(const ByteIterator
&other
) const noexcept
{
2976 return doc
== other
.doc
&& position
== other
.position
;
2978 bool operator!=(const ByteIterator
&other
) const noexcept
{
2979 return doc
!= other
.doc
|| position
!= other
.position
;
2981 Sci::Position
Pos() const noexcept
{
2984 Sci::Position
PosRoundUp() const noexcept
{
2989 // On Windows, wchar_t is 16 bits wide and on Unix it is 32 bits wide.
2990 // Would be better to use sizeof(wchar_t) or similar to differentiate
2991 // but easier for now to hard-code platforms.
2992 // C++11 has char16_t and char32_t but neither Clang nor Visual C++
2993 // appear to allow specializing basic_regex over these.
2996 #define WCHAR_T_IS_16 1
2998 #define WCHAR_T_IS_16 0
3003 // On Windows, report non-BMP characters as 2 separate surrogates as that
3004 // matches wregex since it is based on wchar_t.
3005 class UTF8Iterator
{
3006 // These 3 fields determine the iterator position and are used for comparisons
3007 const Document
*doc
;
3008 Sci::Position position
;
3009 size_t characterIndex
;
3010 // Remaining fields are derived from the determining fields so are excluded in comparisons
3011 unsigned int lenBytes
;
3012 size_t lenCharacters
;
3013 wchar_t buffered
[2];
3015 using iterator_category
= std::bidirectional_iterator_tag
;
3016 using value_type
= wchar_t;
3017 using difference_type
= ptrdiff_t;
3018 using pointer
= wchar_t*;
3019 using reference
= wchar_t&;
3021 explicit UTF8Iterator(const Document
*doc_
=nullptr, Sci::Position position_
=0) noexcept
:
3022 doc(doc_
), position(position_
), characterIndex(0), lenBytes(0), lenCharacters(0), buffered
{} {
3029 wchar_t operator*() const noexcept
{
3030 assert(lenCharacters
!= 0);
3031 return buffered
[characterIndex
];
3033 UTF8Iterator
&operator++() noexcept
{
3034 if ((characterIndex
+ 1) < (lenCharacters
)) {
3037 position
+= lenBytes
;
3043 UTF8Iterator
operator++(int) noexcept
{
3044 UTF8Iterator
retVal(*this);
3045 if ((characterIndex
+ 1) < (lenCharacters
)) {
3048 position
+= lenBytes
;
3054 UTF8Iterator
&operator--() noexcept
{
3055 if (characterIndex
) {
3058 position
= doc
->NextPosition(position
, -1);
3060 characterIndex
= lenCharacters
- 1;
3064 bool operator==(const UTF8Iterator
&other
) const noexcept
{
3065 // Only test the determining fields, not the character widths and values derived from this
3066 return doc
== other
.doc
&&
3067 position
== other
.position
&&
3068 characterIndex
== other
.characterIndex
;
3070 bool operator!=(const UTF8Iterator
&other
) const noexcept
{
3071 // Only test the determining fields, not the character widths and values derived from this
3072 return doc
!= other
.doc
||
3073 position
!= other
.position
||
3074 characterIndex
!= other
.characterIndex
;
3076 Sci::Position
Pos() const noexcept
{
3079 Sci::Position
PosRoundUp() const noexcept
{
3081 return position
+ lenBytes
; // Force to end of character
3086 void ReadCharacter() noexcept
{
3087 const CharacterExtracted charExtracted
= doc
->ExtractCharacter(position
);
3088 lenBytes
= charExtracted
.widthBytes
;
3089 if (charExtracted
.character
== unicodeReplacementChar
) {
3091 buffered
[0] = static_cast<wchar_t>(charExtracted
.character
);
3093 lenCharacters
= UTF16FromUTF32Character(charExtracted
.character
, buffered
);
3100 // On Unix, report non-BMP characters as single characters
3102 class UTF8Iterator
{
3103 const Document
*doc
;
3104 Sci::Position position
;
3106 using iterator_category
= std::bidirectional_iterator_tag
;
3107 using value_type
= wchar_t;
3108 using difference_type
= ptrdiff_t;
3109 using pointer
= wchar_t*;
3110 using reference
= wchar_t&;
3112 explicit UTF8Iterator(const Document
*doc_
=nullptr, Sci::Position position_
=0) noexcept
:
3113 doc(doc_
), position(position_
) {
3115 wchar_t operator*() const noexcept
{
3116 const CharacterExtracted charExtracted
= doc
->ExtractCharacter(position
);
3117 return charExtracted
.character
;
3119 UTF8Iterator
&operator++() noexcept
{
3120 position
= doc
->NextPosition(position
, 1);
3123 UTF8Iterator
operator++(int) noexcept
{
3124 UTF8Iterator
retVal(*this);
3125 position
= doc
->NextPosition(position
, 1);
3128 UTF8Iterator
&operator--() noexcept
{
3129 position
= doc
->NextPosition(position
, -1);
3132 bool operator==(const UTF8Iterator
&other
) const noexcept
{
3133 return doc
== other
.doc
&& position
== other
.position
;
3135 bool operator!=(const UTF8Iterator
&other
) const noexcept
{
3136 return doc
!= other
.doc
|| position
!= other
.position
;
3138 Sci::Position
Pos() const noexcept
{
3141 Sci::Position
PosRoundUp() const noexcept
{
3148 std::regex_constants::match_flag_type
MatchFlags(const Document
*doc
, Sci::Position startPos
, Sci::Position endPos
, Sci::Position lineStartPos
, Sci::Position lineEndPos
) {
3149 std::regex_constants::match_flag_type flagsMatch
= std::regex_constants::match_default
;
3150 if (startPos
!= lineStartPos
) {
3151 #ifdef _LIBCPP_VERSION
3152 flagsMatch
|= std::regex_constants::match_not_bol
;
3153 if (!doc
->IsWordStartAt(startPos
)) {
3154 flagsMatch
|= std::regex_constants::match_not_bow
;
3157 flagsMatch
|= std::regex_constants::match_prev_avail
;
3160 if (endPos
!= lineEndPos
) {
3161 flagsMatch
|= std::regex_constants::match_not_eol
;
3162 if (!doc
->IsWordEndAt(endPos
)) {
3163 flagsMatch
|= std::regex_constants::match_not_eow
;
3169 template<typename Iterator
, typename Regex
>
3170 bool MatchOnLines(const Document
*doc
, const Regex
®exp
, const RESearchRange
&resr
, RESearch
&search
) {
3171 std::match_results
<Iterator
> match
;
3173 // MSVC and libc++ have problems with ^ and $ matching line ends inside a range.
3174 // CRLF line ends are also a problem as ^ and $ only treat LF as a line end.
3175 // The std::regex::multiline option was added to C++17 to improve behaviour but
3176 // has not been implemented by compiler runtimes with MSVC always in multiline
3177 // mode and libc++ and libstdc++ always in single-line mode.
3178 // If multiline regex worked well then the line by line iteration could be removed
3179 // for the forwards case and replaced with the following:
3180 #ifdef REGEX_MULTILINE
3181 const Sci::Position lineStartPos
= doc
->LineStart(resr
.lineRangeStart
);
3182 const Sci::Position lineEndPos
= doc
->LineEnd(resr
.lineRangeEnd
);
3183 Iterator
itStart(doc
, resr
.startPos
);
3184 Iterator
itEnd(doc
, resr
.endPos
);
3185 const std::regex_constants::match_flag_type flagsMatch
= MatchFlags(doc
, resr
.startPos
, resr
.endPos
, lineStartPos
, lineEndPos
);
3186 const bool matched
= std::regex_search(itStart
, itEnd
, match
, regexp
, flagsMatch
);
3189 bool matched
= false;
3190 for (Sci::Line line
= resr
.lineRangeStart
; line
!= resr
.lineRangeBreak
; line
+= resr
.increment
) {
3191 const Sci::Position lineStartPos
= doc
->LineStart(line
);
3192 const Sci::Position lineEndPos
= doc
->LineEnd(line
);
3193 const Range lineRange
= resr
.LineRange(line
, lineStartPos
, lineEndPos
);
3194 Iterator
itStart(doc
, lineRange
.start
);
3195 Iterator
itEnd(doc
, lineRange
.end
);
3196 const std::regex_constants::match_flag_type flagsMatch
= MatchFlags(doc
, lineRange
.start
, lineRange
.end
, lineStartPos
, lineEndPos
);
3197 std::regex_iterator
<Iterator
> it(itStart
, itEnd
, regexp
, flagsMatch
);
3198 for (const std::regex_iterator
<Iterator
> last
; it
!= last
; ++it
) {
3201 if (resr
.increment
> 0) {
3211 for (size_t co
= 0; co
< match
.size() && co
< RESearch::MAXTAG
; co
++) {
3212 search
.bopat
[co
] = match
[co
].first
.Pos();
3213 search
.eopat
[co
] = match
[co
].second
.PosRoundUp();
3219 Sci::Position
Cxx11RegexFindText(const Document
*doc
, Sci::Position minPos
, Sci::Position maxPos
, const char *s
,
3220 bool caseSensitive
, Sci::Position
*length
, RESearch
&search
) {
3221 const RESearchRange
resr(doc
, minPos
, maxPos
);
3224 std::regex::flag_type flagsRe
= std::regex::ECMAScript
;
3225 // Flags that appear to have no effect:
3226 // | std::regex::collate | std::regex::extended;
3228 flagsRe
= flagsRe
| std::regex::icase
;
3230 #if defined(REGEX_MULTILINE) && !defined(_MSC_VER)
3231 flagsRe
= flagsRe
| std::regex::multiline
;
3234 // Clear the RESearch so can fill in matches
3237 bool matched
= false;
3238 if (CpUtf8
== doc
->dbcsCodePage
) {
3239 const std::wstring ws
= WStringFromUTF8(s
);
3241 regexp
.assign(ws
, flagsRe
);
3242 matched
= MatchOnLines
<UTF8Iterator
>(doc
, regexp
, resr
, search
);
3245 regexp
.assign(s
, flagsRe
);
3246 matched
= MatchOnLines
<ByteIterator
>(doc
, regexp
, resr
, search
);
3249 Sci::Position posMatch
= -1;
3251 posMatch
= search
.bopat
[0];
3252 *length
= search
.eopat
[0] - search
.bopat
[0];
3254 // Example - search in doc/ScintillaHistory.html for
3255 // [[:upper:]]eta[[:space:]]
3256 // On MacBook, normally around 1 second but with locale imbued -> 14 seconds.
3257 //const double durSearch = ep.Duration(true);
3258 //Platform::DebugPrintf("Search:%9.6g \n", durSearch);
3260 } catch (std::regex_error
&) {
3261 // Failed to create regular expression
3264 // Failed in some other way
3273 Sci::Position
BuiltinRegex::FindText(Document
*doc
, Sci::Position minPos
, Sci::Position maxPos
, const char *s
,
3274 bool caseSensitive
, bool, bool, FindOption flags
,
3275 Sci::Position
*length
) {
3277 #ifndef NO_CXX11_REGEX
3278 if (FlagSet(flags
, FindOption::Cxx11RegEx
)) {
3279 return Cxx11RegexFindText(doc
, minPos
, maxPos
, s
,
3280 caseSensitive
, length
, search
);
3284 const RESearchRange
resr(doc
, minPos
, maxPos
);
3286 const bool posix
= FlagSet(flags
, FindOption::Posix
);
3288 const char *errmsg
= search
.Compile(s
, *length
, caseSensitive
, posix
);
3292 // Find a variable in a property file: \$(\([A-Za-z0-9_.]+\))
3293 // Replace first '.' with '-' in each property file variable reference:
3294 // Search: \$(\([A-Za-z0-9_-]+\)\.\([A-Za-z0-9_.]+\))
3295 // Replace: $(\1-\2)
3296 Sci::Position pos
= -1;
3297 Sci::Position lenRet
= 0;
3298 const bool searchforLineStart
= s
[0] == '^';
3299 const char searchEnd
= s
[*length
- 1];
3300 const char searchEndPrev
= (*length
> 1) ? s
[*length
- 2] : '\0';
3301 const bool searchforLineEnd
= (searchEnd
== '$') && (searchEndPrev
!= '\\');
3302 for (Sci::Line line
= resr
.lineRangeStart
; line
!= resr
.lineRangeBreak
; line
+= resr
.increment
) {
3303 const Sci::Position lineStartPos
= doc
->LineStart(line
);
3304 const Sci::Position lineEndPos
= doc
->LineEnd(line
);
3305 Sci::Position startOfLine
= lineStartPos
;
3306 Sci::Position endOfLine
= lineEndPos
;
3307 if (resr
.increment
== 1) {
3308 if (line
== resr
.lineRangeStart
) {
3309 if ((resr
.startPos
!= startOfLine
) && searchforLineStart
)
3310 continue; // Can't match start of line if start position after start of line
3311 startOfLine
= resr
.startPos
;
3313 if (line
== resr
.lineRangeEnd
) {
3314 if ((resr
.endPos
!= endOfLine
) && searchforLineEnd
)
3315 continue; // Can't match end of line if end position before end of line
3316 endOfLine
= resr
.endPos
;
3319 if (line
== resr
.lineRangeEnd
) {
3320 if ((resr
.endPos
!= startOfLine
) && searchforLineStart
)
3321 continue; // Can't match start of line if end position after start of line
3322 startOfLine
= resr
.endPos
;
3324 if (line
== resr
.lineRangeStart
) {
3325 if ((resr
.startPos
!= endOfLine
) && searchforLineEnd
)
3326 continue; // Can't match end of line if start position before end of line
3327 endOfLine
= resr
.startPos
;
3331 const DocumentIndexer
di(doc
, endOfLine
);
3332 search
.SetLineRange(lineStartPos
, lineEndPos
);
3333 int success
= search
.Execute(di
, startOfLine
, endOfLine
);
3335 Sci::Position endPos
= search
.eopat
[0];
3336 // There can be only one start of a line, so no need to look for last match in line
3337 if ((resr
.increment
== -1) && !searchforLineStart
) {
3338 // Check for the last match on this line.
3339 while (success
&& (endPos
< endOfLine
)) {
3340 const RESearch::MatchPositions bopat
= search
.bopat
;
3341 const RESearch::MatchPositions eopat
= search
.eopat
;
3343 if (pos
== bopat
[0]) {
3345 pos
= doc
->NextPosition(pos
, 1);
3347 success
= search
.Execute(di
, pos
, endOfLine
);
3349 endPos
= search
.eopat
[0];
3351 search
.bopat
= bopat
;
3352 search
.eopat
= eopat
;
3356 pos
= search
.bopat
[0];
3357 lenRet
= endPos
- pos
;
3365 const char *BuiltinRegex::SubstituteByPosition(Document
*doc
, const char *text
, Sci::Position
*length
) {
3366 substituted
.clear();
3367 for (Sci::Position j
= 0; j
< *length
; j
++) {
3368 if (text
[j
] == '\\') {
3369 const char chNext
= text
[++j
];
3370 if (chNext
>= '0' && chNext
<= '9') {
3371 const unsigned int patNum
= chNext
- '0';
3372 const Sci::Position startPos
= search
.bopat
[patNum
];
3373 const Sci::Position len
= search
.eopat
[patNum
] - startPos
;
3374 if (len
> 0) { // Will be null if try for a match that did not occur
3375 const size_t size
= substituted
.length();
3376 substituted
.resize(size
+ len
);
3377 doc
->GetCharRange(substituted
.data() + size
, startPos
, len
);
3382 substituted
.push_back('\a');
3385 substituted
.push_back('\b');
3388 substituted
.push_back('\f');
3391 substituted
.push_back('\n');
3394 substituted
.push_back('\r');
3397 substituted
.push_back('\t');
3400 substituted
.push_back('\v');
3403 substituted
.push_back('\\');
3406 substituted
.push_back('\\');
3411 substituted
.push_back(text
[j
]);
3414 *length
= substituted
.length();
3415 return substituted
.c_str();
3418 #ifndef SCI_OWNREGEX
3420 RegexSearchBase
*Scintilla::Internal::CreateRegexSearch(CharClassify
*charClassTable
) {
3421 return new BuiltinRegex(charClassTable
);