1 // Scintilla source code edit control
3 ** Text document that handles notifications, DBCS, styling, words and end of line.
5 // Copyright 1998-2011 by Neil Hodgson <neilh@scintilla.org>
6 // The License.txt file describes the conditions under which this software may be distributed.
18 #include <forward_list>
23 #ifndef NO_CXX11_REGEX
31 #include "Scintilla.h"
33 #include "CharacterSet.h"
34 #include "CharacterCategory.h"
36 #include "SplitVector.h"
37 #include "Partitioning.h"
38 #include "RunStyles.h"
39 #include "CellBuffer.h"
41 #include "CharClassify.h"
42 #include "Decoration.h"
43 #include "CaseFolder.h"
46 #include "UniConversion.h"
47 #include "ElapsedPeriod.h"
49 using namespace Scintilla
;
51 void LexInterface::Colourise(Sci::Position start
, Sci::Position end
) {
52 if (pdoc
&& instance
&& !performingStyle
) {
53 // Protect against reentrance, which may occur, for example, when
54 // fold points are discovered while performing styling and the folding
55 // code looks for child lines which may trigger styling.
56 performingStyle
= true;
58 const Sci::Position lengthDoc
= pdoc
->Length();
61 const Sci::Position len
= end
- start
;
63 PLATFORM_ASSERT(len
>= 0);
64 PLATFORM_ASSERT(start
+ len
<= lengthDoc
);
68 styleStart
= pdoc
->StyleAt(start
- 1);
71 instance
->Lex(start
, len
, styleStart
, pdoc
);
72 instance
->Fold(start
, len
, styleStart
, pdoc
);
75 performingStyle
= false;
79 int LexInterface::LineEndTypesSupported() {
81 const int interfaceVersion
= instance
->Version();
82 if (interfaceVersion
>= lvSubStyles
) {
83 ILexerWithSubStyles
*ssinstance
= static_cast<ILexerWithSubStyles
*>(instance
);
84 return ssinstance
->LineEndTypesSupported();
90 ActionDuration::ActionDuration(double duration_
, double minDuration_
, double maxDuration_
) noexcept
:
91 duration(duration_
), minDuration(minDuration_
), maxDuration(maxDuration_
) {
94 void ActionDuration::AddSample(size_t numberActions
, double durationOfActions
) noexcept
{
95 // Only adjust for multiple actions to avoid instability
96 if (numberActions
< 8)
99 // Alpha value for exponential smoothing.
100 // Most recent value contributes 25% to smoothed value.
101 const double alpha
= 0.25;
103 const double durationOne
= durationOfActions
/ numberActions
;
104 duration
= Sci::clamp(alpha
* durationOne
+ (1.0 - alpha
) * duration
,
105 minDuration
, maxDuration
);
108 double ActionDuration::Duration() const noexcept
{
112 Document::Document(int options
) :
113 cb((options
& SC_DOCUMENTOPTION_STYLES_NONE
) == 0, (options
& SC_DOCUMENTOPTION_TEXT_LARGE
) != 0),
114 durationStyleOneLine(0.00001, 0.000001, 0.0001) {
117 eolMode
= SC_EOL_CRLF
;
121 dbcsCodePage
= SC_CP_UTF8
;
122 lineEndBitSet
= SC_LINE_END_TYPE_DEFAULT
;
125 enteredModification
= 0;
127 enteredReadOnlyCount
= 0;
128 insertionSet
= false;
131 actualIndentInChars
= 8;
134 backspaceUnindents
= false;
136 matchesValid
= false;
138 perLineData
[ldMarkers
].reset(new LineMarkers());
139 perLineData
[ldLevels
].reset(new LineLevels());
140 perLineData
[ldState
].reset(new LineState());
141 perLineData
[ldMargin
].reset(new LineAnnotation());
142 perLineData
[ldAnnotation
].reset(new LineAnnotation());
144 decorations
= DecorationListCreate(IsLarge());
147 cb
.SetUTF8Substance(SC_CP_UTF8
== dbcsCodePage
);
150 Document::~Document() {
151 for (const WatcherWithUserData
&watcher
: watchers
) {
152 watcher
.watcher
->NotifyDeleted(this, watcher
.userData
);
156 // Increase reference count and return its previous value.
157 int Document::AddRef() {
161 // Decrease reference count and return its previous value.
162 // Delete the document if reference count reaches zero.
163 int SCI_METHOD
Document::Release() {
164 const int curRefCount
= --refCount
;
165 if (curRefCount
== 0)
170 void Document::Init() {
171 for (const std::unique_ptr
<PerLine
> &pl
: perLineData
) {
177 void Document::InsertLine(Sci::Line line
) {
178 for (const std::unique_ptr
<PerLine
> &pl
: perLineData
) {
180 pl
->InsertLine(line
);
184 void Document::RemoveLine(Sci::Line line
) {
185 for (const std::unique_ptr
<PerLine
> &pl
: perLineData
) {
187 pl
->RemoveLine(line
);
191 LineMarkers
*Document::Markers() const {
192 return static_cast<LineMarkers
*>(perLineData
[ldMarkers
].get());
195 LineLevels
*Document::Levels() const {
196 return static_cast<LineLevels
*>(perLineData
[ldLevels
].get());
199 LineState
*Document::States() const {
200 return static_cast<LineState
*>(perLineData
[ldState
].get());
203 LineAnnotation
*Document::Margins() const {
204 return static_cast<LineAnnotation
*>(perLineData
[ldMargin
].get());
207 LineAnnotation
*Document::Annotations() const {
208 return static_cast<LineAnnotation
*>(perLineData
[ldAnnotation
].get());
211 int Document::LineEndTypesSupported() const {
212 if ((SC_CP_UTF8
== dbcsCodePage
) && pli
)
213 return pli
->LineEndTypesSupported();
218 bool Document::SetDBCSCodePage(int dbcsCodePage_
) {
219 if (dbcsCodePage
!= dbcsCodePage_
) {
220 dbcsCodePage
= dbcsCodePage_
;
221 SetCaseFolder(nullptr);
222 cb
.SetLineEndTypes(lineEndBitSet
& LineEndTypesSupported());
223 cb
.SetUTF8Substance(SC_CP_UTF8
== dbcsCodePage
);
224 ModifiedAt(0); // Need to restyle whole document
231 bool Document::SetLineEndTypesAllowed(int lineEndBitSet_
) {
232 if (lineEndBitSet
!= lineEndBitSet_
) {
233 lineEndBitSet
= lineEndBitSet_
;
234 const int lineEndBitSetActive
= lineEndBitSet
& LineEndTypesSupported();
235 if (lineEndBitSetActive
!= cb
.GetLineEndTypes()) {
237 cb
.SetLineEndTypes(lineEndBitSetActive
);
247 void Document::SetSavePoint() {
249 NotifySavePoint(true);
252 void Document::TentativeUndo() {
253 if (!TentativeActive())
256 if (enteredModification
== 0) {
257 enteredModification
++;
258 if (!cb
.IsReadOnly()) {
259 const bool startSavePoint
= cb
.IsSavePoint();
260 bool multiLine
= false;
261 const int steps
= cb
.TentativeSteps();
262 //Platform::DebugPrintf("Steps=%d\n", steps);
263 for (int step
= 0; step
< steps
; step
++) {
264 const Sci::Line prevLinesTotal
= LinesTotal();
265 const Action
&action
= cb
.GetUndoStep();
266 if (action
.at
== removeAction
) {
267 NotifyModified(DocModification(
268 SC_MOD_BEFOREINSERT
| SC_PERFORMED_UNDO
, action
));
269 } else if (action
.at
== containerAction
) {
270 DocModification
dm(SC_MOD_CONTAINER
| SC_PERFORMED_UNDO
);
271 dm
.token
= action
.position
;
274 NotifyModified(DocModification(
275 SC_MOD_BEFOREDELETE
| SC_PERFORMED_UNDO
, action
));
277 cb
.PerformUndoStep();
278 if (action
.at
!= containerAction
) {
279 ModifiedAt(action
.position
);
282 int modFlags
= SC_PERFORMED_UNDO
;
283 // With undo, an insertion action becomes a deletion notification
284 if (action
.at
== removeAction
) {
285 modFlags
|= SC_MOD_INSERTTEXT
;
286 } else if (action
.at
== insertAction
) {
287 modFlags
|= SC_MOD_DELETETEXT
;
290 modFlags
|= SC_MULTISTEPUNDOREDO
;
291 const Sci::Line linesAdded
= LinesTotal() - prevLinesTotal
;
294 if (step
== steps
- 1) {
295 modFlags
|= SC_LASTSTEPINUNDOREDO
;
297 modFlags
|= SC_MULTILINEUNDOREDO
;
299 NotifyModified(DocModification(modFlags
, action
.position
, action
.lenData
,
300 linesAdded
, action
.data
.get()));
303 const bool endSavePoint
= cb
.IsSavePoint();
304 if (startSavePoint
!= endSavePoint
)
305 NotifySavePoint(endSavePoint
);
307 cb
.TentativeCommit();
309 enteredModification
--;
313 int Document::GetMark(Sci::Line line
) const {
314 return Markers()->MarkValue(line
);
317 Sci::Line
Document::MarkerNext(Sci::Line lineStart
, int mask
) const {
318 return Markers()->MarkerNext(lineStart
, mask
);
321 int Document::AddMark(Sci::Line line
, int markerNum
) {
322 if (line
>= 0 && line
<= LinesTotal()) {
323 const int prev
= Markers()->AddMark(line
, markerNum
, LinesTotal());
324 const DocModification
mh(SC_MOD_CHANGEMARKER
, LineStart(line
), 0, 0, nullptr, line
);
332 void Document::AddMarkSet(Sci::Line line
, int valueSet
) {
333 if (line
< 0 || line
> LinesTotal()) {
336 unsigned int m
= valueSet
;
337 for (int i
= 0; m
; i
++, m
>>= 1) {
339 Markers()->AddMark(line
, i
, LinesTotal());
341 const DocModification
mh(SC_MOD_CHANGEMARKER
, LineStart(line
), 0, 0, nullptr, line
);
345 void Document::DeleteMark(Sci::Line line
, int markerNum
) {
346 Markers()->DeleteMark(line
, markerNum
, false);
347 const DocModification
mh(SC_MOD_CHANGEMARKER
, LineStart(line
), 0, 0, nullptr, line
);
351 void Document::DeleteMarkFromHandle(int markerHandle
) {
352 Markers()->DeleteMarkFromHandle(markerHandle
);
353 DocModification
mh(SC_MOD_CHANGEMARKER
);
358 void Document::DeleteAllMarks(int markerNum
) {
359 bool someChanges
= false;
360 for (Sci::Line line
= 0; line
< LinesTotal(); line
++) {
361 if (Markers()->DeleteMark(line
, markerNum
, true))
365 DocModification
mh(SC_MOD_CHANGEMARKER
);
371 Sci::Line
Document::LineFromHandle(int markerHandle
) const {
372 return Markers()->LineFromHandle(markerHandle
);
375 Sci_Position SCI_METHOD
Document::LineStart(Sci_Position line
) const {
376 return cb
.LineStart(static_cast<Sci::Line
>(line
));
379 bool Document::IsLineStartPosition(Sci::Position position
) const {
380 return LineStart(LineFromPosition(position
)) == position
;
383 Sci_Position SCI_METHOD
Document::LineEnd(Sci_Position line
) const {
384 if (line
>= LinesTotal() - 1) {
385 return LineStart(line
+ 1);
387 Sci::Position position
= LineStart(line
+ 1);
388 if (SC_CP_UTF8
== dbcsCodePage
) {
389 const unsigned char bytes
[] = {
390 cb
.UCharAt(position
-3),
391 cb
.UCharAt(position
-2),
392 cb
.UCharAt(position
-1),
394 if (UTF8IsSeparator(bytes
)) {
395 return position
- UTF8SeparatorLength
;
397 if (UTF8IsNEL(bytes
+1)) {
398 return position
- UTF8NELLength
;
401 position
--; // Back over CR or LF
402 // When line terminator is CR+LF, may need to go back one more
403 if ((position
> LineStart(line
)) && (cb
.CharAt(position
- 1) == '\r')) {
410 void SCI_METHOD
Document::SetErrorStatus(int status
) {
411 // Tell the watchers an error has occurred.
412 for (const WatcherWithUserData
&watcher
: watchers
) {
413 watcher
.watcher
->NotifyErrorOccurred(this, watcher
.userData
, status
);
417 Sci_Position SCI_METHOD
Document::LineFromPosition(Sci_Position pos
) const {
418 return cb
.LineFromPosition(pos
);
421 Sci::Line
Document::SciLineFromPosition(Sci::Position pos
) const noexcept
{
422 // Avoids casting in callers for this very common function
423 return cb
.LineFromPosition(pos
);
426 Sci::Position
Document::LineEndPosition(Sci::Position position
) const {
427 return LineEnd(LineFromPosition(position
));
430 bool Document::IsLineEndPosition(Sci::Position position
) const {
431 return LineEnd(LineFromPosition(position
)) == position
;
434 bool Document::IsPositionInLineEnd(Sci::Position position
) const {
435 return position
>= LineEnd(LineFromPosition(position
));
438 Sci::Position
Document::VCHomePosition(Sci::Position position
) const {
439 const Sci::Line line
= SciLineFromPosition(position
);
440 const Sci::Position startPosition
= LineStart(line
);
441 const Sci::Position endLine
= LineEnd(line
);
442 Sci::Position startText
= startPosition
;
443 while (startText
< endLine
&& (cb
.CharAt(startText
) == ' ' || cb
.CharAt(startText
) == '\t'))
445 if (position
== startText
)
446 return startPosition
;
451 Sci::Position
Document::IndexLineStart(Sci::Line line
, int lineCharacterIndex
) const {
452 return cb
.IndexLineStart(line
, lineCharacterIndex
);
455 Sci::Line
Document::LineFromPositionIndex(Sci::Position pos
, int lineCharacterIndex
) const {
456 return cb
.LineFromPositionIndex(pos
, lineCharacterIndex
);
459 int SCI_METHOD
Document::SetLevel(Sci_Position line
, int level
) {
460 const int prev
= Levels()->SetLevel(static_cast<Sci::Line
>(line
), level
, LinesTotal());
462 DocModification
mh(SC_MOD_CHANGEFOLD
| SC_MOD_CHANGEMARKER
,
463 LineStart(line
), 0, 0, nullptr, static_cast<Sci::Line
>(line
));
464 mh
.foldLevelNow
= level
;
465 mh
.foldLevelPrev
= prev
;
471 int SCI_METHOD
Document::GetLevel(Sci_Position line
) const {
472 return Levels()->GetLevel(static_cast<Sci::Line
>(line
));
475 void Document::ClearLevels() {
476 Levels()->ClearLevels();
479 static bool IsSubordinate(int levelStart
, int levelTry
) noexcept
{
480 if (levelTry
& SC_FOLDLEVELWHITEFLAG
)
483 return LevelNumber(levelStart
) < LevelNumber(levelTry
);
486 Sci::Line
Document::GetLastChild(Sci::Line lineParent
, int level
, Sci::Line lastLine
) {
488 level
= LevelNumber(GetLevel(lineParent
));
489 const Sci::Line maxLine
= LinesTotal();
490 const Sci::Line lookLastLine
= (lastLine
!= -1) ? std::min(LinesTotal() - 1, lastLine
) : -1;
491 Sci::Line lineMaxSubord
= lineParent
;
492 while (lineMaxSubord
< maxLine
- 1) {
493 EnsureStyledTo(LineStart(lineMaxSubord
+ 2));
494 if (!IsSubordinate(level
, GetLevel(lineMaxSubord
+ 1)))
496 if ((lookLastLine
!= -1) && (lineMaxSubord
>= lookLastLine
) && !(GetLevel(lineMaxSubord
) & SC_FOLDLEVELWHITEFLAG
))
500 if (lineMaxSubord
> lineParent
) {
501 if (level
> LevelNumber(GetLevel(lineMaxSubord
+ 1))) {
502 // Have chewed up some whitespace that belongs to a parent so seek back
503 if (GetLevel(lineMaxSubord
) & SC_FOLDLEVELWHITEFLAG
) {
508 return lineMaxSubord
;
511 Sci::Line
Document::GetFoldParent(Sci::Line line
) const {
512 const int level
= LevelNumber(GetLevel(line
));
513 Sci::Line lineLook
= line
- 1;
514 while ((lineLook
> 0) && (
515 (!(GetLevel(lineLook
) & SC_FOLDLEVELHEADERFLAG
)) ||
516 (LevelNumber(GetLevel(lineLook
)) >= level
))
520 if ((GetLevel(lineLook
) & SC_FOLDLEVELHEADERFLAG
) &&
521 (LevelNumber(GetLevel(lineLook
)) < level
)) {
528 void Document::GetHighlightDelimiters(HighlightDelimiter
&highlightDelimiter
, Sci::Line line
, Sci::Line lastLine
) {
529 const int level
= GetLevel(line
);
530 const Sci::Line lookLastLine
= std::max(line
, lastLine
) + 1;
532 Sci::Line lookLine
= line
;
533 int lookLineLevel
= level
;
534 int lookLineLevelNum
= LevelNumber(lookLineLevel
);
535 while ((lookLine
> 0) && ((lookLineLevel
& SC_FOLDLEVELWHITEFLAG
) ||
536 ((lookLineLevel
& SC_FOLDLEVELHEADERFLAG
) && (lookLineLevelNum
>= LevelNumber(GetLevel(lookLine
+ 1)))))) {
537 lookLineLevel
= GetLevel(--lookLine
);
538 lookLineLevelNum
= LevelNumber(lookLineLevel
);
541 Sci::Line beginFoldBlock
= (lookLineLevel
& SC_FOLDLEVELHEADERFLAG
) ? lookLine
: GetFoldParent(lookLine
);
542 if (beginFoldBlock
== -1) {
543 highlightDelimiter
.Clear();
547 Sci::Line endFoldBlock
= GetLastChild(beginFoldBlock
, -1, lookLastLine
);
548 Sci::Line firstChangeableLineBefore
= -1;
549 if (endFoldBlock
< line
) {
550 lookLine
= beginFoldBlock
- 1;
551 lookLineLevel
= GetLevel(lookLine
);
552 lookLineLevelNum
= LevelNumber(lookLineLevel
);
553 while ((lookLine
>= 0) && (lookLineLevelNum
>= SC_FOLDLEVELBASE
)) {
554 if (lookLineLevel
& SC_FOLDLEVELHEADERFLAG
) {
555 if (GetLastChild(lookLine
, -1, lookLastLine
) == line
) {
556 beginFoldBlock
= lookLine
;
558 firstChangeableLineBefore
= line
- 1;
561 if ((lookLine
> 0) && (lookLineLevelNum
== SC_FOLDLEVELBASE
) && (LevelNumber(GetLevel(lookLine
- 1)) > lookLineLevelNum
))
563 lookLineLevel
= GetLevel(--lookLine
);
564 lookLineLevelNum
= LevelNumber(lookLineLevel
);
567 if (firstChangeableLineBefore
== -1) {
568 for (lookLine
= line
- 1, lookLineLevel
= GetLevel(lookLine
), lookLineLevelNum
= LevelNumber(lookLineLevel
);
569 lookLine
>= beginFoldBlock
;
570 lookLineLevel
= GetLevel(--lookLine
), lookLineLevelNum
= LevelNumber(lookLineLevel
)) {
571 if ((lookLineLevel
& SC_FOLDLEVELWHITEFLAG
) || (lookLineLevelNum
> LevelNumber(level
))) {
572 firstChangeableLineBefore
= lookLine
;
577 if (firstChangeableLineBefore
== -1)
578 firstChangeableLineBefore
= beginFoldBlock
- 1;
580 Sci::Line firstChangeableLineAfter
= -1;
581 for (lookLine
= line
+ 1, lookLineLevel
= GetLevel(lookLine
), lookLineLevelNum
= LevelNumber(lookLineLevel
);
582 lookLine
<= endFoldBlock
;
583 lookLineLevel
= GetLevel(++lookLine
), lookLineLevelNum
= LevelNumber(lookLineLevel
)) {
584 if ((lookLineLevel
& SC_FOLDLEVELHEADERFLAG
) && (lookLineLevelNum
< LevelNumber(GetLevel(lookLine
+ 1)))) {
585 firstChangeableLineAfter
= lookLine
;
589 if (firstChangeableLineAfter
== -1)
590 firstChangeableLineAfter
= endFoldBlock
+ 1;
592 highlightDelimiter
.beginFoldBlock
= beginFoldBlock
;
593 highlightDelimiter
.endFoldBlock
= endFoldBlock
;
594 highlightDelimiter
.firstChangeableLineBefore
= firstChangeableLineBefore
;
595 highlightDelimiter
.firstChangeableLineAfter
= firstChangeableLineAfter
;
598 Sci::Position
Document::ClampPositionIntoDocument(Sci::Position pos
) const {
599 return Sci::clamp(pos
, static_cast<Sci::Position
>(0), static_cast<Sci::Position
>(Length()));
602 bool Document::IsCrLf(Sci::Position pos
) const {
605 if (pos
>= (Length() - 1))
607 return (cb
.CharAt(pos
) == '\r') && (cb
.CharAt(pos
+ 1) == '\n');
610 int Document::LenChar(Sci::Position pos
) {
613 } else if (IsCrLf(pos
)) {
615 } else if (SC_CP_UTF8
== dbcsCodePage
) {
616 const unsigned char leadByte
= cb
.UCharAt(pos
);
617 const int widthCharBytes
= UTF8BytesOfLead
[leadByte
];
618 const Sci::Position lengthDoc
= Length();
619 if ((pos
+ widthCharBytes
) > lengthDoc
)
620 return static_cast<int>(lengthDoc
- pos
);
622 return widthCharBytes
;
623 } else if (dbcsCodePage
) {
624 return IsDBCSLeadByteNoExcept(cb
.CharAt(pos
)) ? 2 : 1;
630 bool Document::InGoodUTF8(Sci::Position pos
, Sci::Position
&start
, Sci::Position
&end
) const noexcept
{
631 Sci::Position trail
= pos
;
632 while ((trail
>0) && (pos
-trail
< UTF8MaxBytes
) && UTF8IsTrailByte(cb
.UCharAt(trail
-1)))
634 start
= (trail
> 0) ? trail
-1 : trail
;
636 const unsigned char leadByte
= cb
.UCharAt(start
);
637 const int widthCharBytes
= UTF8BytesOfLead
[leadByte
];
638 if (widthCharBytes
== 1) {
641 const int trailBytes
= widthCharBytes
- 1;
642 const Sci::Position len
= pos
- start
;
643 if (len
> trailBytes
)
644 // pos too far from lead
646 unsigned char charBytes
[UTF8MaxBytes
] = {leadByte
,0,0,0};
647 for (Sci::Position b
=1; b
<widthCharBytes
&& ((start
+b
) < cb
.Length()); b
++)
648 charBytes
[b
] = cb
.CharAt(start
+b
);
649 const int utf8status
= UTF8Classify(charBytes
, widthCharBytes
);
650 if (utf8status
& UTF8MaskInvalid
)
652 end
= start
+ widthCharBytes
;
657 // Normalise a position so that it is not halfway through a two byte character.
658 // This can occur in two situations -
659 // When lines are terminated with \r\n pairs which should be treated as one character.
660 // When displaying DBCS text such as Japanese.
661 // If moving, move the position in the indicated direction.
662 Sci::Position
Document::MovePositionOutsideChar(Sci::Position pos
, Sci::Position moveDir
, bool checkLineEnd
) const {
663 //Platform::DebugPrintf("NoCRLF %d %d\n", pos, moveDir);
664 // If out of range, just return minimum/maximum value.
670 // PLATFORM_ASSERT(pos > 0 && pos < Length());
671 if (checkLineEnd
&& IsCrLf(pos
- 1)) {
679 if (SC_CP_UTF8
== dbcsCodePage
) {
680 const unsigned char ch
= cb
.UCharAt(pos
);
681 // If ch is not a trail byte then pos is valid intercharacter position
682 if (UTF8IsTrailByte(ch
)) {
683 Sci::Position startUTF
= pos
;
684 Sci::Position endUTF
= pos
;
685 if (InGoodUTF8(pos
, startUTF
, endUTF
)) {
686 // ch is a trail byte within a UTF-8 character
692 // Else invalid UTF-8 so return position of isolated trail byte
695 // Anchor DBCS calculations at start of line because start of line can
696 // not be a DBCS trail byte.
697 const Sci::Position posStartLine
= LineStart(LineFromPosition(pos
));
698 if (pos
== posStartLine
)
701 // Step back until a non-lead-byte is found.
702 Sci::Position posCheck
= pos
;
703 while ((posCheck
> posStartLine
) && IsDBCSLeadByteNoExcept(cb
.CharAt(posCheck
-1)))
706 // Check from known start of character.
707 while (posCheck
< pos
) {
708 const int mbsize
= IsDBCSLeadByteNoExcept(cb
.CharAt(posCheck
)) ? 2 : 1;
709 if (posCheck
+ mbsize
== pos
) {
711 } else if (posCheck
+ mbsize
> pos
) {
713 return posCheck
+ mbsize
;
726 // NextPosition moves between valid positions - it can not handle a position in the middle of a
727 // multi-byte character. It is used to iterate through text more efficiently than MovePositionOutsideChar.
728 // A \r\n pair is treated as two characters.
729 Sci::Position
Document::NextPosition(Sci::Position pos
, int moveDir
) const noexcept
{
730 // If out of range, just return minimum/maximum value.
731 const int increment
= (moveDir
> 0) ? 1 : -1;
732 if (pos
+ increment
<= 0)
734 if (pos
+ increment
>= cb
.Length())
738 if (SC_CP_UTF8
== dbcsCodePage
) {
739 if (increment
== 1) {
740 // Simple forward movement case so can avoid some checks
741 const unsigned char leadByte
= cb
.UCharAt(pos
);
742 if (UTF8IsAscii(leadByte
)) {
743 // Single byte character or invalid
746 const int widthCharBytes
= UTF8BytesOfLead
[leadByte
];
747 unsigned char charBytes
[UTF8MaxBytes
] = {leadByte
,0,0,0};
748 for (int b
=1; b
<widthCharBytes
; b
++)
749 charBytes
[b
] = cb
.CharAt(pos
+b
);
750 const int utf8status
= UTF8Classify(charBytes
, widthCharBytes
);
751 if (utf8status
& UTF8MaskInvalid
)
754 pos
+= utf8status
& UTF8MaskWidth
;
757 // Examine byte before position
759 const unsigned char ch
= cb
.UCharAt(pos
);
760 // If ch is not a trail byte then pos is valid intercharacter position
761 if (UTF8IsTrailByte(ch
)) {
762 // If ch is a trail byte in a valid UTF-8 character then return start of character
763 Sci::Position startUTF
= pos
;
764 Sci::Position endUTF
= pos
;
765 if (InGoodUTF8(pos
, startUTF
, endUTF
)) {
768 // Else invalid UTF-8 so return position of isolated trail byte
773 const int mbsize
= IsDBCSLeadByteNoExcept(cb
.CharAt(pos
)) ? 2 : 1;
775 if (pos
> cb
.Length())
778 // Anchor DBCS calculations at start of line because start of line can
779 // not be a DBCS trail byte.
780 const Sci::Position posStartLine
= cb
.LineStart(cb
.LineFromPosition(pos
));
781 // See http://msdn.microsoft.com/en-us/library/cc194792%28v=MSDN.10%29.aspx
782 // http://msdn.microsoft.com/en-us/library/cc194790.aspx
783 if ((pos
- 1) <= posStartLine
) {
785 } else if (IsDBCSLeadByteNoExcept(cb
.CharAt(pos
- 1))) {
786 // Must actually be trail byte
789 // Otherwise, step back until a non-lead-byte is found.
790 Sci::Position posTemp
= pos
- 1;
791 while (posStartLine
<= --posTemp
&& IsDBCSLeadByteNoExcept(cb
.CharAt(posTemp
)))
793 // Now posTemp+1 must point to the beginning of a character,
794 // so figure out whether we went back an even or an odd
795 // number of bytes and go back 1 or 2 bytes, respectively.
796 return (pos
- 1 - ((pos
- posTemp
) & 1));
807 bool Document::NextCharacter(Sci::Position
&pos
, int moveDir
) const noexcept
{
808 // Returns true if pos changed
809 Sci::Position posNext
= NextPosition(pos
, moveDir
);
810 if (posNext
== pos
) {
818 Document::CharacterExtracted
Document::CharacterAfter(Sci::Position position
) const {
819 if (position
>= Length()) {
820 return CharacterExtracted(unicodeReplacementChar
, 0);
822 const unsigned char leadByte
= cb
.UCharAt(position
);
823 if (!dbcsCodePage
|| UTF8IsAscii(leadByte
)) {
824 // Common case: ASCII character
825 return CharacterExtracted(leadByte
, 1);
827 if (SC_CP_UTF8
== dbcsCodePage
) {
828 const int widthCharBytes
= UTF8BytesOfLead
[leadByte
];
829 unsigned char charBytes
[UTF8MaxBytes
] = { leadByte
, 0, 0, 0 };
830 for (int b
= 1; b
<widthCharBytes
; b
++)
831 charBytes
[b
] = cb
.UCharAt(position
+ b
);
832 const int utf8status
= UTF8Classify(charBytes
, widthCharBytes
);
833 if (utf8status
& UTF8MaskInvalid
) {
834 // Treat as invalid and use up just one byte
835 return CharacterExtracted(unicodeReplacementChar
, 1);
837 return CharacterExtracted(UnicodeFromUTF8(charBytes
), utf8status
& UTF8MaskWidth
);
840 if (IsDBCSLeadByteNoExcept(leadByte
) && ((position
+ 1) < Length())) {
841 return CharacterExtracted::DBCS(leadByte
, cb
.UCharAt(position
+ 1));
843 return CharacterExtracted(leadByte
, 1);
848 Document::CharacterExtracted
Document::CharacterBefore(Sci::Position position
) const {
850 return CharacterExtracted(unicodeReplacementChar
, 0);
852 const unsigned char previousByte
= cb
.UCharAt(position
- 1);
853 if (0 == dbcsCodePage
) {
854 return CharacterExtracted(previousByte
, 1);
856 if (SC_CP_UTF8
== dbcsCodePage
) {
857 if (UTF8IsAscii(previousByte
)) {
858 return CharacterExtracted(previousByte
, 1);
861 // If previousByte is not a trail byte then its invalid
862 if (UTF8IsTrailByte(previousByte
)) {
863 // If previousByte is a trail byte in a valid UTF-8 character then find start of character
864 Sci::Position startUTF
= position
;
865 Sci::Position endUTF
= position
;
866 if (InGoodUTF8(position
, startUTF
, endUTF
)) {
867 const int widthCharBytes
= static_cast<int>(endUTF
- startUTF
);
868 unsigned char charBytes
[UTF8MaxBytes
] = { 0, 0, 0, 0 };
869 for (int b
= 0; b
<widthCharBytes
; b
++)
870 charBytes
[b
] = cb
.UCharAt(startUTF
+ b
);
871 const int utf8status
= UTF8Classify(charBytes
, widthCharBytes
);
872 if (utf8status
& UTF8MaskInvalid
) {
873 // Treat as invalid and use up just one byte
874 return CharacterExtracted(unicodeReplacementChar
, 1);
876 return CharacterExtracted(UnicodeFromUTF8(charBytes
), utf8status
& UTF8MaskWidth
);
879 // Else invalid UTF-8 so return position of isolated trail byte
881 return CharacterExtracted(unicodeReplacementChar
, 1);
883 // Moving backwards in DBCS is complex so use NextPosition
884 const Sci::Position posStartCharacter
= NextPosition(position
, -1);
885 return CharacterAfter(posStartCharacter
);
889 // Return -1 on out-of-bounds
890 Sci_Position SCI_METHOD
Document::GetRelativePosition(Sci_Position positionStart
, Sci_Position characterOffset
) const {
891 Sci::Position pos
= positionStart
;
893 const int increment
= (characterOffset
> 0) ? 1 : -1;
894 while (characterOffset
!= 0) {
895 const Sci::Position posNext
= NextPosition(pos
, increment
);
897 return INVALID_POSITION
;
899 characterOffset
-= increment
;
902 pos
= positionStart
+ characterOffset
;
903 if ((pos
< 0) || (pos
> Length()))
904 return INVALID_POSITION
;
909 Sci::Position
Document::GetRelativePositionUTF16(Sci::Position positionStart
, Sci::Position characterOffset
) const {
910 Sci::Position pos
= positionStart
;
912 const int increment
= (characterOffset
> 0) ? 1 : -1;
913 while (characterOffset
!= 0) {
914 const Sci::Position posNext
= NextPosition(pos
, increment
);
916 return INVALID_POSITION
;
917 if (std::abs(pos
-posNext
) > 3) // 4 byte character = 2*UTF16.
918 characterOffset
-= increment
;
920 characterOffset
-= increment
;
923 pos
= positionStart
+ characterOffset
;
924 if ((pos
< 0) || (pos
> Length()))
925 return INVALID_POSITION
;
930 int SCI_METHOD
Document::GetCharacterAndWidth(Sci_Position position
, Sci_Position
*pWidth
) const {
932 int bytesInCharacter
= 1;
933 const unsigned char leadByte
= cb
.UCharAt(position
);
935 if (SC_CP_UTF8
== dbcsCodePage
) {
936 if (UTF8IsAscii(leadByte
)) {
937 // Single byte character or invalid
938 character
= leadByte
;
940 const int widthCharBytes
= UTF8BytesOfLead
[leadByte
];
941 unsigned char charBytes
[UTF8MaxBytes
] = {leadByte
,0,0,0};
942 for (int b
=1; b
<widthCharBytes
; b
++)
943 charBytes
[b
] = cb
.UCharAt(position
+b
);
944 const int utf8status
= UTF8Classify(charBytes
, widthCharBytes
);
945 if (utf8status
& UTF8MaskInvalid
) {
946 // Report as singleton surrogate values which are invalid Unicode
947 character
= 0xDC80 + leadByte
;
949 bytesInCharacter
= utf8status
& UTF8MaskWidth
;
950 character
= UnicodeFromUTF8(charBytes
);
954 if (IsDBCSLeadByteNoExcept(leadByte
)) {
955 bytesInCharacter
= 2;
956 character
= (leadByte
<< 8) | cb
.UCharAt(position
+1);
958 character
= leadByte
;
962 character
= leadByte
;
965 *pWidth
= bytesInCharacter
;
970 int SCI_METHOD
Document::CodePage() const {
974 bool SCI_METHOD
Document::IsDBCSLeadByte(char ch
) const {
975 // Used by lexers so must match IDocument method exactly
976 return IsDBCSLeadByteNoExcept(ch
);
979 bool Document::IsDBCSLeadByteNoExcept(char ch
) const noexcept
{
980 // Used inside core Scintilla
981 // Byte ranges found in Wikipedia articles with relevant search strings in each case
982 const unsigned char uch
= ch
;
983 switch (dbcsCodePage
) {
986 return ((uch
>= 0x81) && (uch
<= 0x9F)) ||
987 ((uch
>= 0xE0) && (uch
<= 0xFC));
988 // Lead bytes F0 to FC may be a Microsoft addition.
991 return (uch
>= 0x81) && (uch
<= 0xFE);
993 // Korean Wansung KS C-5601-1987
994 return (uch
>= 0x81) && (uch
<= 0xFE);
997 return (uch
>= 0x81) && (uch
<= 0xFE);
999 // Korean Johab KS C-5601-1992
1001 ((uch
>= 0x84) && (uch
<= 0xD3)) ||
1002 ((uch
>= 0xD8) && (uch
<= 0xDE)) ||
1003 ((uch
>= 0xE0) && (uch
<= 0xF9));
1008 bool Document::IsDBCSLeadByteInvalid(char ch
) const noexcept
{
1009 const unsigned char lead
= ch
;
1010 switch (dbcsCodePage
) {
1024 return (lead
== 0x80) || (lead
== 0xFF);
1026 // Korean Wansung KS C-5601-1987
1027 return (lead
== 0x80) || (lead
== 0xC9) || (lead
>= 0xFE);
1031 ((lead
>= 0x80) && (lead
<= 0xA0)) ||
1035 // Korean Johab KS C-5601-1992
1037 ((lead
>= 0x80) && (lead
<= 0x83)) ||
1038 ((lead
>= 0xD4) && (lead
<= 0xD8)) ||
1045 bool Document::IsDBCSTrailByteInvalid(char ch
) const noexcept
{
1046 const unsigned char trail
= ch
;
1047 switch (dbcsCodePage
) {
1061 // Korean Wansung KS C-5601-1987
1064 ((trail
>= 0x5B) && (trail
<= 0x60)) ||
1065 ((trail
>= 0x7B) && (trail
<= 0x80)) ||
1071 ((trail
>= 0x7F) && (trail
<= 0xA0)) ||
1074 // Korean Johab KS C-5601-1992
1084 int Document::DBCSDrawBytes(const char *text
, int len
) const noexcept
{
1088 if (IsDBCSLeadByteNoExcept(text
[0])) {
1089 return IsDBCSTrailByteInvalid(text
[1]) ? 1 : 2;
1095 static constexpr bool IsSpaceOrTab(int ch
) noexcept
{
1096 return ch
== ' ' || ch
== '\t';
1099 // Need to break text into segments near lengthSegment but taking into
1100 // account the encoding to not break inside a UTF-8 or DBCS character
1101 // and also trying to avoid breaking inside a pair of combining characters.
1102 // The segment length must always be long enough (more than 4 bytes)
1103 // so that there will be at least one whole character to make a segment.
1104 // For UTF-8, text must consist only of valid whole characters.
1105 // In preference order from best to worst:
1106 // 1) Break after space
1107 // 2) Break before punctuation
1108 // 3) Break after whole character
1110 int Document::SafeSegment(const char *text
, int length
, int lengthSegment
) const noexcept
{
1111 if (length
<= lengthSegment
)
1113 int lastSpaceBreak
= -1;
1114 int lastPunctuationBreak
= -1;
1115 int lastEncodingAllowedBreak
= 0;
1116 for (int j
=0; j
< lengthSegment
;) {
1117 const unsigned char ch
= text
[j
];
1119 if (IsSpaceOrTab(text
[j
- 1]) && !IsSpaceOrTab(text
[j
])) {
1123 lastPunctuationBreak
= j
;
1126 lastEncodingAllowedBreak
= j
;
1128 if (dbcsCodePage
== SC_CP_UTF8
) {
1129 j
+= UTF8BytesOfLead
[ch
];
1130 } else if (dbcsCodePage
) {
1131 j
+= IsDBCSLeadByteNoExcept(ch
) ? 2 : 1;
1136 if (lastSpaceBreak
>= 0) {
1137 return lastSpaceBreak
;
1138 } else if (lastPunctuationBreak
>= 0) {
1139 return lastPunctuationBreak
;
1141 return lastEncodingAllowedBreak
;
1144 EncodingFamily
Document::CodePageFamily() const noexcept
{
1145 if (SC_CP_UTF8
== dbcsCodePage
)
1147 else if (dbcsCodePage
)
1153 void Document::ModifiedAt(Sci::Position pos
) noexcept
{
1154 if (endStyled
> pos
)
1158 void Document::CheckReadOnly() {
1159 if (cb
.IsReadOnly() && enteredReadOnlyCount
== 0) {
1160 enteredReadOnlyCount
++;
1161 NotifyModifyAttempt();
1162 enteredReadOnlyCount
--;
1166 // Document only modified by gateways DeleteChars, InsertString, Undo, Redo, and SetStyleAt.
1167 // SetStyleAt does not change the persistent state of a document
1169 bool Document::DeleteChars(Sci::Position pos
, Sci::Position len
) {
1174 if ((pos
+ len
) > Length())
1177 if (enteredModification
!= 0) {
1180 enteredModification
++;
1181 if (!cb
.IsReadOnly()) {
1184 SC_MOD_BEFOREDELETE
| SC_PERFORMED_USER
,
1187 const Sci::Line prevLinesTotal
= LinesTotal();
1188 const bool startSavePoint
= cb
.IsSavePoint();
1189 bool startSequence
= false;
1190 const char *text
= cb
.DeleteChars(pos
, len
, startSequence
);
1191 if (startSavePoint
&& cb
.IsCollectingUndo())
1192 NotifySavePoint(!startSavePoint
);
1193 if ((pos
< Length()) || (pos
== 0))
1199 SC_MOD_DELETETEXT
| SC_PERFORMED_USER
| (startSequence
?SC_STARTACTION
:0),
1201 LinesTotal() - prevLinesTotal
, text
));
1203 enteredModification
--;
1205 return !cb
.IsReadOnly();
1209 * Insert a string with a length.
1211 Sci::Position
Document::InsertString(Sci::Position position
, const char *s
, Sci::Position insertLength
) {
1212 if (insertLength
<= 0) {
1215 CheckReadOnly(); // Application may change read only state here
1216 if (cb
.IsReadOnly()) {
1219 if (enteredModification
!= 0) {
1222 enteredModification
++;
1223 insertionSet
= false;
1228 position
, insertLength
,
1231 s
= insertion
.c_str();
1232 insertLength
= insertion
.length();
1236 SC_MOD_BEFOREINSERT
| SC_PERFORMED_USER
,
1237 position
, insertLength
,
1239 const Sci::Line prevLinesTotal
= LinesTotal();
1240 const bool startSavePoint
= cb
.IsSavePoint();
1241 bool startSequence
= false;
1242 const char *text
= cb
.InsertString(position
, s
, insertLength
, startSequence
);
1243 if (startSavePoint
&& cb
.IsCollectingUndo())
1244 NotifySavePoint(!startSavePoint
);
1245 ModifiedAt(position
);
1248 SC_MOD_INSERTTEXT
| SC_PERFORMED_USER
| (startSequence
?SC_STARTACTION
:0),
1249 position
, insertLength
,
1250 LinesTotal() - prevLinesTotal
, text
));
1251 if (insertionSet
) { // Free memory as could be large
1252 std::string().swap(insertion
);
1254 enteredModification
--;
1255 return insertLength
;
1258 void Document::ChangeInsertion(const char *s
, Sci::Position length
) {
1259 insertionSet
= true;
1260 insertion
.assign(s
, length
);
1263 int SCI_METHOD
Document::AddData(const char *data
, Sci_Position length
) {
1265 const Sci::Position position
= Length();
1266 InsertString(position
, data
, length
);
1267 } catch (std::bad_alloc
&) {
1268 return SC_STATUS_BADALLOC
;
1270 return SC_STATUS_FAILURE
;
1275 void * SCI_METHOD
Document::ConvertToDocument() {
1279 Sci::Position
Document::Undo() {
1280 Sci::Position newPos
= -1;
1282 if ((enteredModification
== 0) && (cb
.IsCollectingUndo())) {
1283 enteredModification
++;
1284 if (!cb
.IsReadOnly()) {
1285 const bool startSavePoint
= cb
.IsSavePoint();
1286 bool multiLine
= false;
1287 const int steps
= cb
.StartUndo();
1288 //Platform::DebugPrintf("Steps=%d\n", steps);
1289 Sci::Position coalescedRemovePos
= -1;
1290 Sci::Position coalescedRemoveLen
= 0;
1291 Sci::Position prevRemoveActionPos
= -1;
1292 Sci::Position prevRemoveActionLen
= 0;
1293 for (int step
= 0; step
< steps
; step
++) {
1294 const Sci::Line prevLinesTotal
= LinesTotal();
1295 const Action
&action
= cb
.GetUndoStep();
1296 if (action
.at
== removeAction
) {
1297 NotifyModified(DocModification(
1298 SC_MOD_BEFOREINSERT
| SC_PERFORMED_UNDO
, action
));
1299 } else if (action
.at
== containerAction
) {
1300 DocModification
dm(SC_MOD_CONTAINER
| SC_PERFORMED_UNDO
);
1301 dm
.token
= action
.position
;
1303 if (!action
.mayCoalesce
) {
1304 coalescedRemovePos
= -1;
1305 coalescedRemoveLen
= 0;
1306 prevRemoveActionPos
= -1;
1307 prevRemoveActionLen
= 0;
1310 NotifyModified(DocModification(
1311 SC_MOD_BEFOREDELETE
| SC_PERFORMED_UNDO
, action
));
1313 cb
.PerformUndoStep();
1314 if (action
.at
!= containerAction
) {
1315 ModifiedAt(action
.position
);
1316 newPos
= action
.position
;
1319 int modFlags
= SC_PERFORMED_UNDO
;
1320 // With undo, an insertion action becomes a deletion notification
1321 if (action
.at
== removeAction
) {
1322 newPos
+= action
.lenData
;
1323 modFlags
|= SC_MOD_INSERTTEXT
;
1324 if ((coalescedRemoveLen
> 0) &&
1325 (action
.position
== prevRemoveActionPos
|| action
.position
== (prevRemoveActionPos
+ prevRemoveActionLen
))) {
1326 coalescedRemoveLen
+= action
.lenData
;
1327 newPos
= coalescedRemovePos
+ coalescedRemoveLen
;
1329 coalescedRemovePos
= action
.position
;
1330 coalescedRemoveLen
= action
.lenData
;
1332 prevRemoveActionPos
= action
.position
;
1333 prevRemoveActionLen
= action
.lenData
;
1334 } else if (action
.at
== insertAction
) {
1335 modFlags
|= SC_MOD_DELETETEXT
;
1336 coalescedRemovePos
= -1;
1337 coalescedRemoveLen
= 0;
1338 prevRemoveActionPos
= -1;
1339 prevRemoveActionLen
= 0;
1342 modFlags
|= SC_MULTISTEPUNDOREDO
;
1343 const Sci::Line linesAdded
= LinesTotal() - prevLinesTotal
;
1344 if (linesAdded
!= 0)
1346 if (step
== steps
- 1) {
1347 modFlags
|= SC_LASTSTEPINUNDOREDO
;
1349 modFlags
|= SC_MULTILINEUNDOREDO
;
1351 NotifyModified(DocModification(modFlags
, action
.position
, action
.lenData
,
1352 linesAdded
, action
.data
.get()));
1355 const bool endSavePoint
= cb
.IsSavePoint();
1356 if (startSavePoint
!= endSavePoint
)
1357 NotifySavePoint(endSavePoint
);
1359 enteredModification
--;
1364 Sci::Position
Document::Redo() {
1365 Sci::Position newPos
= -1;
1367 if ((enteredModification
== 0) && (cb
.IsCollectingUndo())) {
1368 enteredModification
++;
1369 if (!cb
.IsReadOnly()) {
1370 const bool startSavePoint
= cb
.IsSavePoint();
1371 bool multiLine
= false;
1372 const int steps
= cb
.StartRedo();
1373 for (int step
= 0; step
< steps
; step
++) {
1374 const Sci::Line prevLinesTotal
= LinesTotal();
1375 const Action
&action
= cb
.GetRedoStep();
1376 if (action
.at
== insertAction
) {
1377 NotifyModified(DocModification(
1378 SC_MOD_BEFOREINSERT
| SC_PERFORMED_REDO
, action
));
1379 } else if (action
.at
== containerAction
) {
1380 DocModification
dm(SC_MOD_CONTAINER
| SC_PERFORMED_REDO
);
1381 dm
.token
= action
.position
;
1384 NotifyModified(DocModification(
1385 SC_MOD_BEFOREDELETE
| SC_PERFORMED_REDO
, action
));
1387 cb
.PerformRedoStep();
1388 if (action
.at
!= containerAction
) {
1389 ModifiedAt(action
.position
);
1390 newPos
= action
.position
;
1393 int modFlags
= SC_PERFORMED_REDO
;
1394 if (action
.at
== insertAction
) {
1395 newPos
+= action
.lenData
;
1396 modFlags
|= SC_MOD_INSERTTEXT
;
1397 } else if (action
.at
== removeAction
) {
1398 modFlags
|= SC_MOD_DELETETEXT
;
1401 modFlags
|= SC_MULTISTEPUNDOREDO
;
1402 const Sci::Line linesAdded
= LinesTotal() - prevLinesTotal
;
1403 if (linesAdded
!= 0)
1405 if (step
== steps
- 1) {
1406 modFlags
|= SC_LASTSTEPINUNDOREDO
;
1408 modFlags
|= SC_MULTILINEUNDOREDO
;
1411 DocModification(modFlags
, action
.position
, action
.lenData
,
1412 linesAdded
, action
.data
.get()));
1415 const bool endSavePoint
= cb
.IsSavePoint();
1416 if (startSavePoint
!= endSavePoint
)
1417 NotifySavePoint(endSavePoint
);
1419 enteredModification
--;
1424 void Document::DelChar(Sci::Position pos
) {
1425 DeleteChars(pos
, LenChar(pos
));
1428 void Document::DelCharBack(Sci::Position pos
) {
1431 } else if (IsCrLf(pos
- 2)) {
1432 DeleteChars(pos
- 2, 2);
1433 } else if (dbcsCodePage
) {
1434 const Sci::Position startChar
= NextPosition(pos
, -1);
1435 DeleteChars(startChar
, pos
- startChar
);
1437 DeleteChars(pos
- 1, 1);
1441 static constexpr Sci::Position
NextTab(Sci::Position pos
, Sci::Position tabSize
) noexcept
{
1442 return ((pos
/ tabSize
) + 1) * tabSize
;
1445 static std::string
CreateIndentation(Sci::Position indent
, int tabSize
, bool insertSpaces
) {
1446 std::string indentation
;
1447 if (!insertSpaces
) {
1448 while (indent
>= tabSize
) {
1449 indentation
+= '\t';
1453 while (indent
> 0) {
1460 int SCI_METHOD
Document::GetLineIndentation(Sci_Position line
) {
1462 if ((line
>= 0) && (line
< LinesTotal())) {
1463 const Sci::Position lineStart
= LineStart(line
);
1464 const Sci::Position length
= Length();
1465 for (Sci::Position i
= lineStart
; i
< length
; i
++) {
1466 const char ch
= cb
.CharAt(i
);
1469 else if (ch
== '\t')
1470 indent
= static_cast<int>(NextTab(indent
, tabInChars
));
1478 Sci::Position
Document::SetLineIndentation(Sci::Line line
, Sci::Position indent
) {
1479 const int indentOfLine
= GetLineIndentation(line
);
1482 if (indent
!= indentOfLine
) {
1483 std::string linebuf
= CreateIndentation(indent
, tabInChars
, !useTabs
);
1484 const Sci::Position thisLineStart
= LineStart(line
);
1485 const Sci::Position indentPos
= GetLineIndentPosition(line
);
1487 DeleteChars(thisLineStart
, indentPos
- thisLineStart
);
1488 return thisLineStart
+ InsertString(thisLineStart
, linebuf
.c_str(),
1491 return GetLineIndentPosition(line
);
1495 Sci::Position
Document::GetLineIndentPosition(Sci::Line line
) const {
1498 Sci::Position pos
= LineStart(line
);
1499 const Sci::Position length
= Length();
1500 while ((pos
< length
) && IsSpaceOrTab(cb
.CharAt(pos
))) {
1506 Sci::Position
Document::GetColumn(Sci::Position pos
) {
1507 Sci::Position column
= 0;
1508 const Sci::Line line
= SciLineFromPosition(pos
);
1509 if ((line
>= 0) && (line
< LinesTotal())) {
1510 for (Sci::Position i
= LineStart(line
); i
< pos
;) {
1511 const char ch
= cb
.CharAt(i
);
1513 column
= NextTab(column
, tabInChars
);
1515 } else if (ch
== '\r') {
1517 } else if (ch
== '\n') {
1519 } else if (i
>= Length()) {
1523 i
= NextPosition(i
, 1);
1530 Sci::Position
Document::CountCharacters(Sci::Position startPos
, Sci::Position endPos
) const {
1531 startPos
= MovePositionOutsideChar(startPos
, 1, false);
1532 endPos
= MovePositionOutsideChar(endPos
, -1, false);
1533 Sci::Position count
= 0;
1534 Sci::Position i
= startPos
;
1535 while (i
< endPos
) {
1537 i
= NextPosition(i
, 1);
1542 Sci::Position
Document::CountUTF16(Sci::Position startPos
, Sci::Position endPos
) const {
1543 startPos
= MovePositionOutsideChar(startPos
, 1, false);
1544 endPos
= MovePositionOutsideChar(endPos
, -1, false);
1545 Sci::Position count
= 0;
1546 Sci::Position i
= startPos
;
1547 while (i
< endPos
) {
1549 const Sci::Position next
= NextPosition(i
, 1);
1557 Sci::Position
Document::FindColumn(Sci::Line line
, Sci::Position column
) {
1558 Sci::Position position
= LineStart(line
);
1559 if ((line
>= 0) && (line
< LinesTotal())) {
1560 Sci::Position columnCurrent
= 0;
1561 while ((columnCurrent
< column
) && (position
< Length())) {
1562 const char ch
= cb
.CharAt(position
);
1564 columnCurrent
= NextTab(columnCurrent
, tabInChars
);
1565 if (columnCurrent
> column
)
1568 } else if (ch
== '\r') {
1570 } else if (ch
== '\n') {
1574 position
= NextPosition(position
, 1);
1581 void Document::Indent(bool forwards
, Sci::Line lineBottom
, Sci::Line lineTop
) {
1582 // Dedent - suck white space off the front of the line to dedent by equivalent of a tab
1583 for (Sci::Line line
= lineBottom
; line
>= lineTop
; line
--) {
1584 const Sci::Position indentOfLine
= GetLineIndentation(line
);
1586 if (LineStart(line
) < LineEnd(line
)) {
1587 SetLineIndentation(line
, indentOfLine
+ IndentSize());
1590 SetLineIndentation(line
, indentOfLine
- IndentSize());
1595 // Convert line endings for a piece of text to a particular mode.
1596 // Stop at len or when a NUL is found.
1597 std::string
Document::TransformLineEnds(const char *s
, size_t len
, int eolModeWanted
) {
1599 for (size_t i
= 0; (i
< len
) && (s
[i
]); i
++) {
1600 if (s
[i
] == '\n' || s
[i
] == '\r') {
1601 if (eolModeWanted
== SC_EOL_CR
) {
1602 dest
.push_back('\r');
1603 } else if (eolModeWanted
== SC_EOL_LF
) {
1604 dest
.push_back('\n');
1605 } else { // eolModeWanted == SC_EOL_CRLF
1606 dest
.push_back('\r');
1607 dest
.push_back('\n');
1609 if ((s
[i
] == '\r') && (i
+1 < len
) && (s
[i
+1] == '\n')) {
1613 dest
.push_back(s
[i
]);
1619 void Document::ConvertLineEnds(int eolModeSet
) {
1622 for (Sci::Position pos
= 0; pos
< Length(); pos
++) {
1623 if (cb
.CharAt(pos
) == '\r') {
1624 if (cb
.CharAt(pos
+ 1) == '\n') {
1626 if (eolModeSet
== SC_EOL_CR
) {
1627 DeleteChars(pos
+ 1, 1); // Delete the LF
1628 } else if (eolModeSet
== SC_EOL_LF
) {
1629 DeleteChars(pos
, 1); // Delete the CR
1635 if (eolModeSet
== SC_EOL_CRLF
) {
1636 pos
+= InsertString(pos
+ 1, "\n", 1); // Insert LF
1637 } else if (eolModeSet
== SC_EOL_LF
) {
1638 pos
+= InsertString(pos
, "\n", 1); // Insert LF
1639 DeleteChars(pos
, 1); // Delete CR
1643 } else if (cb
.CharAt(pos
) == '\n') {
1645 if (eolModeSet
== SC_EOL_CRLF
) {
1646 pos
+= InsertString(pos
, "\r", 1); // Insert CR
1647 } else if (eolModeSet
== SC_EOL_CR
) {
1648 pos
+= InsertString(pos
, "\r", 1); // Insert CR
1649 DeleteChars(pos
, 1); // Delete LF
1657 int Document::Options() const {
1658 return (IsLarge() ? SC_DOCUMENTOPTION_TEXT_LARGE
: 0) |
1659 (cb
.HasStyles() ? 0 : SC_DOCUMENTOPTION_STYLES_NONE
);
1662 bool Document::IsWhiteLine(Sci::Line line
) const {
1663 Sci::Position currentChar
= LineStart(line
);
1664 const Sci::Position endLine
= LineEnd(line
);
1665 while (currentChar
< endLine
) {
1666 if (!IsSpaceOrTab(cb
.CharAt(currentChar
))) {
1674 Sci::Position
Document::ParaUp(Sci::Position pos
) const {
1675 Sci::Line line
= SciLineFromPosition(pos
);
1677 while (line
>= 0 && IsWhiteLine(line
)) { // skip empty lines
1680 while (line
>= 0 && !IsWhiteLine(line
)) { // skip non-empty lines
1684 return LineStart(line
);
1687 Sci::Position
Document::ParaDown(Sci::Position pos
) const {
1688 Sci::Line line
= SciLineFromPosition(pos
);
1689 while (line
< LinesTotal() && !IsWhiteLine(line
)) { // skip non-empty lines
1692 while (line
< LinesTotal() && IsWhiteLine(line
)) { // skip empty lines
1695 if (line
< LinesTotal())
1696 return LineStart(line
);
1697 else // end of a document
1698 return LineEnd(line
-1);
1701 bool Document::IsASCIIWordByte(unsigned char ch
) const {
1703 return charClass
.GetClass(ch
) == CharClassify::ccWord
;
1709 CharClassify::cc
Document::WordCharacterClass(unsigned int ch
) const {
1710 if (dbcsCodePage
&& (!UTF8IsAscii(ch
))) {
1711 if (SC_CP_UTF8
== dbcsCodePage
) {
1712 // Use hard coded Unicode class
1713 const CharacterCategory cc
= charMap
.CategoryFor(ch
);
1716 // Separator, Line/Paragraph
1719 return CharClassify::ccNewLine
;
1729 return CharClassify::ccSpace
;
1741 // Mark - includes combining diacritics
1745 return CharClassify::ccWord
;
1760 return CharClassify::ccPunctuation
;
1765 return CharClassify::ccWord
;
1768 return charClass
.GetClass(static_cast<unsigned char>(ch
));
1772 * Used by commmands that want to select whole words.
1773 * Finds the start of word at pos when delta < 0 or the end of the word when delta >= 0.
1775 Sci::Position
Document::ExtendWordSelect(Sci::Position pos
, int delta
, bool onlyWordCharacters
) const {
1776 CharClassify::cc ccStart
= CharClassify::ccWord
;
1778 if (!onlyWordCharacters
) {
1779 const CharacterExtracted ce
= CharacterBefore(pos
);
1780 ccStart
= WordCharacterClass(ce
.character
);
1783 const CharacterExtracted ce
= CharacterBefore(pos
);
1784 if (WordCharacterClass(ce
.character
) != ccStart
)
1786 pos
-= ce
.widthBytes
;
1789 if (!onlyWordCharacters
&& pos
< Length()) {
1790 const CharacterExtracted ce
= CharacterAfter(pos
);
1791 ccStart
= WordCharacterClass(ce
.character
);
1793 while (pos
< Length()) {
1794 const CharacterExtracted ce
= CharacterAfter(pos
);
1795 if (WordCharacterClass(ce
.character
) != ccStart
)
1797 pos
+= ce
.widthBytes
;
1800 return MovePositionOutsideChar(pos
, delta
, true);
1804 * Find the start of the next word in either a forward (delta >= 0) or backwards direction
1806 * This is looking for a transition between character classes although there is also some
1807 * additional movement to transit white space.
1808 * Used by cursor movement by word commands.
1810 Sci::Position
Document::NextWordStart(Sci::Position pos
, int delta
) const {
1813 const CharacterExtracted ce
= CharacterBefore(pos
);
1814 if (WordCharacterClass(ce
.character
) != CharClassify::ccSpace
)
1816 pos
-= ce
.widthBytes
;
1819 CharacterExtracted ce
= CharacterBefore(pos
);
1820 const CharClassify::cc ccStart
= WordCharacterClass(ce
.character
);
1822 ce
= CharacterBefore(pos
);
1823 if (WordCharacterClass(ce
.character
) != ccStart
)
1825 pos
-= ce
.widthBytes
;
1829 CharacterExtracted ce
= CharacterAfter(pos
);
1830 const CharClassify::cc ccStart
= WordCharacterClass(ce
.character
);
1831 while (pos
< Length()) {
1832 ce
= CharacterAfter(pos
);
1833 if (WordCharacterClass(ce
.character
) != ccStart
)
1835 pos
+= ce
.widthBytes
;
1837 while (pos
< Length()) {
1838 ce
= CharacterAfter(pos
);
1839 if (WordCharacterClass(ce
.character
) != CharClassify::ccSpace
)
1841 pos
+= ce
.widthBytes
;
1848 * Find the end of the next word in either a forward (delta >= 0) or backwards direction
1850 * This is looking for a transition between character classes although there is also some
1851 * additional movement to transit white space.
1852 * Used by cursor movement by word commands.
1854 Sci::Position
Document::NextWordEnd(Sci::Position pos
, int delta
) const {
1857 CharacterExtracted ce
= CharacterBefore(pos
);
1858 const CharClassify::cc ccStart
= WordCharacterClass(ce
.character
);
1859 if (ccStart
!= CharClassify::ccSpace
) {
1861 ce
= CharacterBefore(pos
);
1862 if (WordCharacterClass(ce
.character
) != ccStart
)
1864 pos
-= ce
.widthBytes
;
1868 ce
= CharacterBefore(pos
);
1869 if (WordCharacterClass(ce
.character
) != CharClassify::ccSpace
)
1871 pos
-= ce
.widthBytes
;
1875 while (pos
< Length()) {
1876 const CharacterExtracted ce
= CharacterAfter(pos
);
1877 if (WordCharacterClass(ce
.character
) != CharClassify::ccSpace
)
1879 pos
+= ce
.widthBytes
;
1881 if (pos
< Length()) {
1882 CharacterExtracted ce
= CharacterAfter(pos
);
1883 const CharClassify::cc ccStart
= WordCharacterClass(ce
.character
);
1884 while (pos
< Length()) {
1885 ce
= CharacterAfter(pos
);
1886 if (WordCharacterClass(ce
.character
) != ccStart
)
1888 pos
+= ce
.widthBytes
;
1896 * Check that the character at the given position is a word or punctuation character and that
1897 * the previous character is of a different character class.
1899 bool Document::IsWordStartAt(Sci::Position pos
) const {
1900 if (pos
>= Length())
1903 const CharacterExtracted cePos
= CharacterAfter(pos
);
1904 const CharClassify::cc ccPos
= WordCharacterClass(cePos
.character
);
1905 const CharacterExtracted cePrev
= CharacterBefore(pos
);
1906 const CharClassify::cc ccPrev
= WordCharacterClass(cePrev
.character
);
1907 return (ccPos
== CharClassify::ccWord
|| ccPos
== CharClassify::ccPunctuation
) &&
1914 * Check that the character at the given position is a word or punctuation character and that
1915 * the next character is of a different character class.
1917 bool Document::IsWordEndAt(Sci::Position pos
) const {
1920 if (pos
< Length()) {
1921 const CharacterExtracted cePos
= CharacterAfter(pos
);
1922 const CharClassify::cc ccPos
= WordCharacterClass(cePos
.character
);
1923 const CharacterExtracted cePrev
= CharacterBefore(pos
);
1924 const CharClassify::cc ccPrev
= WordCharacterClass(cePrev
.character
);
1925 return (ccPrev
== CharClassify::ccWord
|| ccPrev
== CharClassify::ccPunctuation
) &&
1932 * Check that the given range is has transitions between character classes at both
1933 * ends and where the characters on the inside are word or punctuation characters.
1935 bool Document::IsWordAt(Sci::Position start
, Sci::Position end
) const {
1936 return (start
< end
) && IsWordStartAt(start
) && IsWordEndAt(end
);
1939 bool Document::MatchesWordOptions(bool word
, bool wordStart
, Sci::Position pos
, Sci::Position length
) const {
1940 return (!word
&& !wordStart
) ||
1941 (word
&& IsWordAt(pos
, pos
+ length
)) ||
1942 (wordStart
&& IsWordStartAt(pos
));
1945 bool Document::HasCaseFolder() const noexcept
{
1946 return pcf
!= nullptr;
1949 void Document::SetCaseFolder(CaseFolder
*pcf_
) {
1953 Document::CharacterExtracted
Document::ExtractCharacter(Sci::Position position
) const noexcept
{
1954 const unsigned char leadByte
= cb
.UCharAt(position
);
1955 if (UTF8IsAscii(leadByte
)) {
1956 // Common case: ASCII character
1957 return CharacterExtracted(leadByte
, 1);
1959 const int widthCharBytes
= UTF8BytesOfLead
[leadByte
];
1960 unsigned char charBytes
[UTF8MaxBytes
] = { leadByte
, 0, 0, 0 };
1961 for (int b
=1; b
<widthCharBytes
; b
++)
1962 charBytes
[b
] = cb
.UCharAt(position
+ b
);
1963 const int utf8status
= UTF8Classify(charBytes
, widthCharBytes
);
1964 if (utf8status
& UTF8MaskInvalid
) {
1965 // Treat as invalid and use up just one byte
1966 return CharacterExtracted(unicodeReplacementChar
, 1);
1968 return CharacterExtracted(UnicodeFromUTF8(charBytes
), utf8status
& UTF8MaskWidth
);
1973 * Find text in document, supporting both forward and backward
1974 * searches (just pass minPos > maxPos to do a backward search)
1975 * Has not been tested with backwards DBCS searches yet.
1977 Sci::Position
Document::FindText(Sci::Position minPos
, Sci::Position maxPos
, const char *search
,
1978 int flags
, Sci::Position
*length
) {
1981 const bool caseSensitive
= (flags
& SCFIND_MATCHCASE
) != 0;
1982 const bool word
= (flags
& SCFIND_WHOLEWORD
) != 0;
1983 const bool wordStart
= (flags
& SCFIND_WORDSTART
) != 0;
1984 const bool regExp
= (flags
& SCFIND_REGEXP
) != 0;
1987 regex
= std::unique_ptr
<RegexSearchBase
>(CreateRegexSearch(&charClass
));
1988 return regex
->FindText(this, minPos
, maxPos
, search
, caseSensitive
, word
, wordStart
, flags
, length
);
1991 const bool forward
= minPos
<= maxPos
;
1992 const int increment
= forward
? 1 : -1;
1994 // Range endpoints should not be inside DBCS characters, but just in case, move them.
1995 const Sci::Position startPos
= MovePositionOutsideChar(minPos
, increment
, false);
1996 const Sci::Position endPos
= MovePositionOutsideChar(maxPos
, increment
, false);
1998 // Compute actual search ranges needed
1999 const Sci::Position lengthFind
= *length
;
2001 //Platform::DebugPrintf("Find %d %d %s %d\n", startPos, endPos, ft->lpstrText, lengthFind);
2002 const Sci::Position limitPos
= std::max(startPos
, endPos
);
2003 Sci::Position pos
= startPos
;
2005 // Back all of a character
2006 pos
= NextPosition(pos
, increment
);
2008 if (caseSensitive
) {
2009 const Sci::Position endSearch
= (startPos
<= endPos
) ? endPos
- lengthFind
+ 1 : endPos
;
2010 const char charStartSearch
= search
[0];
2011 while (forward
? (pos
< endSearch
) : (pos
>= endSearch
)) {
2012 if (CharAt(pos
) == charStartSearch
) {
2013 bool found
= (pos
+ lengthFind
) <= limitPos
;
2014 for (int indexSearch
= 1; (indexSearch
< lengthFind
) && found
; indexSearch
++) {
2015 found
= CharAt(pos
+ indexSearch
) == search
[indexSearch
];
2017 if (found
&& MatchesWordOptions(word
, wordStart
, pos
, lengthFind
)) {
2021 if (!NextCharacter(pos
, increment
))
2024 } else if (SC_CP_UTF8
== dbcsCodePage
) {
2025 const size_t maxFoldingExpansion
= 4;
2026 std::vector
<char> searchThing((lengthFind
+1) * UTF8MaxBytes
* maxFoldingExpansion
+ 1);
2027 const size_t lenSearch
=
2028 pcf
->Fold(&searchThing
[0], searchThing
.size(), search
, lengthFind
);
2029 char bytes
[UTF8MaxBytes
+ 1] = "";
2030 char folded
[UTF8MaxBytes
* maxFoldingExpansion
+ 1] = "";
2031 while (forward
? (pos
< endPos
) : (pos
>= endPos
)) {
2032 int widthFirstCharacter
= 0;
2033 Sci::Position posIndexDocument
= pos
;
2034 size_t indexSearch
= 0;
2035 bool characterMatches
= true;
2037 const unsigned char leadByte
= cb
.UCharAt(posIndexDocument
);
2038 bytes
[0] = leadByte
;
2040 if (!UTF8IsAscii(leadByte
)) {
2041 const int widthCharBytes
= UTF8BytesOfLead
[leadByte
];
2042 for (int b
=1; b
<widthCharBytes
; b
++) {
2043 bytes
[b
] = cb
.CharAt(posIndexDocument
+b
);
2045 widthChar
= UTF8Classify(reinterpret_cast<const unsigned char *>(bytes
), widthCharBytes
) & UTF8MaskWidth
;
2047 if (!widthFirstCharacter
)
2048 widthFirstCharacter
= widthChar
;
2049 if ((posIndexDocument
+ widthChar
) > limitPos
)
2051 const size_t lenFlat
= pcf
->Fold(folded
, sizeof(folded
), bytes
, widthChar
);
2052 // memcmp may examine lenFlat bytes in both arguments so assert it doesn't read past end of searchThing
2053 assert((indexSearch
+ lenFlat
) <= searchThing
.size());
2054 // Does folded match the buffer
2055 characterMatches
= 0 == memcmp(folded
, &searchThing
[0] + indexSearch
, lenFlat
);
2056 if (!characterMatches
)
2058 posIndexDocument
+= widthChar
;
2059 indexSearch
+= lenFlat
;
2060 if (indexSearch
>= lenSearch
)
2063 if (characterMatches
&& (indexSearch
== lenSearch
)) {
2064 if (MatchesWordOptions(word
, wordStart
, pos
, posIndexDocument
- pos
)) {
2065 *length
= posIndexDocument
- pos
;
2070 pos
+= widthFirstCharacter
;
2072 if (!NextCharacter(pos
, increment
))
2076 } else if (dbcsCodePage
) {
2077 const size_t maxBytesCharacter
= 2;
2078 const size_t maxFoldingExpansion
= 4;
2079 std::vector
<char> searchThing((lengthFind
+1) * maxBytesCharacter
* maxFoldingExpansion
+ 1);
2080 const size_t lenSearch
= pcf
->Fold(&searchThing
[0], searchThing
.size(), search
, lengthFind
);
2081 while (forward
? (pos
< endPos
) : (pos
>= endPos
)) {
2082 Sci::Position indexDocument
= 0;
2083 size_t indexSearch
= 0;
2084 bool characterMatches
= true;
2085 while (characterMatches
&&
2086 ((pos
+ indexDocument
) < limitPos
) &&
2087 (indexSearch
< lenSearch
)) {
2088 char bytes
[maxBytesCharacter
+ 1];
2089 bytes
[0] = cb
.CharAt(pos
+ indexDocument
);
2090 const Sci::Position widthChar
= IsDBCSLeadByteNoExcept(bytes
[0]) ? 2 : 1;
2092 bytes
[1] = cb
.CharAt(pos
+ indexDocument
+ 1);
2093 if ((pos
+ indexDocument
+ widthChar
) > limitPos
)
2095 char folded
[maxBytesCharacter
* maxFoldingExpansion
+ 1];
2096 const size_t lenFlat
= pcf
->Fold(folded
, sizeof(folded
), bytes
, widthChar
);
2097 // memcmp may examine lenFlat bytes in both arguments so assert it doesn't read past end of searchThing
2098 assert((indexSearch
+ lenFlat
) <= searchThing
.size());
2099 // Does folded match the buffer
2100 characterMatches
= 0 == memcmp(folded
, &searchThing
[0] + indexSearch
, lenFlat
);
2101 indexDocument
+= widthChar
;
2102 indexSearch
+= lenFlat
;
2104 if (characterMatches
&& (indexSearch
== lenSearch
)) {
2105 if (MatchesWordOptions(word
, wordStart
, pos
, indexDocument
)) {
2106 *length
= indexDocument
;
2110 if (!NextCharacter(pos
, increment
))
2114 const Sci::Position endSearch
= (startPos
<= endPos
) ? endPos
- lengthFind
+ 1 : endPos
;
2115 std::vector
<char> searchThing(lengthFind
+ 1);
2116 pcf
->Fold(&searchThing
[0], searchThing
.size(), search
, lengthFind
);
2117 while (forward
? (pos
< endSearch
) : (pos
>= endSearch
)) {
2118 bool found
= (pos
+ lengthFind
) <= limitPos
;
2119 for (int indexSearch
= 0; (indexSearch
< lengthFind
) && found
; indexSearch
++) {
2120 const char ch
= CharAt(pos
+ indexSearch
);
2122 pcf
->Fold(folded
, sizeof(folded
), &ch
, 1);
2123 found
= folded
[0] == searchThing
[indexSearch
];
2125 if (found
&& MatchesWordOptions(word
, wordStart
, pos
, lengthFind
)) {
2128 if (!NextCharacter(pos
, increment
))
2133 //Platform::DebugPrintf("Not found\n");
2137 const char *Document::SubstituteByPosition(const char *text
, Sci::Position
*length
) {
2139 return regex
->SubstituteByPosition(this, text
, length
);
2144 int Document::LineCharacterIndex() const {
2145 return cb
.LineCharacterIndex();
2148 void Document::AllocateLineCharacterIndex(int lineCharacterIndex
) {
2149 return cb
.AllocateLineCharacterIndex(lineCharacterIndex
);
2152 void Document::ReleaseLineCharacterIndex(int lineCharacterIndex
) {
2153 return cb
.ReleaseLineCharacterIndex(lineCharacterIndex
);
2156 Sci::Line
Document::LinesTotal() const noexcept
{
2160 void Document::SetDefaultCharClasses(bool includeWordClass
) {
2161 charClass
.SetDefaultCharClasses(includeWordClass
);
2164 void Document::SetCharClasses(const unsigned char *chars
, CharClassify::cc newCharClass
) {
2165 charClass
.SetCharClasses(chars
, newCharClass
);
2168 int Document::GetCharsOfClass(CharClassify::cc characterClass
, unsigned char *buffer
) const {
2169 return charClass
.GetCharsOfClass(characterClass
, buffer
);
2172 void Document::SetCharacterCategoryOptimization(int countCharacters
) {
2173 charMap
.Optimize(countCharacters
);
2176 int Document::CharacterCategoryOptimization() const noexcept
{
2177 return charMap
.Size();
2180 void SCI_METHOD
Document::StartStyling(Sci_Position position
, char) {
2181 endStyled
= position
;
2184 bool SCI_METHOD
Document::SetStyleFor(Sci_Position length
, char style
) {
2185 if (enteredStyling
!= 0) {
2189 const Sci::Position prevEndStyled
= endStyled
;
2190 if (cb
.SetStyleFor(endStyled
, length
, style
)) {
2191 const DocModification
mh(SC_MOD_CHANGESTYLE
| SC_PERFORMED_USER
,
2192 prevEndStyled
, length
);
2195 endStyled
+= length
;
2201 bool SCI_METHOD
Document::SetStyles(Sci_Position length
, const char *styles
) {
2202 if (enteredStyling
!= 0) {
2206 bool didChange
= false;
2207 Sci::Position startMod
= 0;
2208 Sci::Position endMod
= 0;
2209 for (int iPos
= 0; iPos
< length
; iPos
++, endStyled
++) {
2210 PLATFORM_ASSERT(endStyled
< Length());
2211 if (cb
.SetStyleAt(endStyled
, styles
[iPos
])) {
2213 startMod
= endStyled
;
2220 const DocModification
mh(SC_MOD_CHANGESTYLE
| SC_PERFORMED_USER
,
2221 startMod
, endMod
- startMod
+ 1);
2229 void Document::EnsureStyledTo(Sci::Position pos
) {
2230 if ((enteredStyling
== 0) && (pos
> GetEndStyled())) {
2231 IncrementStyleClock();
2232 if (pli
&& !pli
->UseContainerLexing()) {
2233 const Sci::Line lineEndStyled
= SciLineFromPosition(GetEndStyled());
2234 const Sci::Position endStyledTo
= LineStart(lineEndStyled
);
2235 pli
->Colourise(endStyledTo
, pos
);
2237 // Ask the watchers to style, and stop as soon as one responds.
2238 for (std::vector
<WatcherWithUserData
>::iterator it
= watchers
.begin();
2239 (pos
> GetEndStyled()) && (it
!= watchers
.end()); ++it
) {
2240 it
->watcher
->NotifyStyleNeeded(this, it
->userData
, pos
);
2246 void Document::StyleToAdjustingLineDuration(Sci::Position pos
) {
2247 const Sci::Line lineFirst
= SciLineFromPosition(GetEndStyled());
2248 ElapsedPeriod epStyling
;
2249 EnsureStyledTo(pos
);
2250 const Sci::Line lineLast
= SciLineFromPosition(GetEndStyled());
2251 durationStyleOneLine
.AddSample(lineLast
- lineFirst
, epStyling
.Duration());
2254 void Document::LexerChanged() {
2255 // Tell the watchers the lexer has changed.
2256 for (const WatcherWithUserData
&watcher
: watchers
) {
2257 watcher
.watcher
->NotifyLexerChanged(this, watcher
.userData
);
2261 LexInterface
*Document::GetLexInterface() const {
2265 void Document::SetLexInterface(LexInterface
*pLexInterface
) {
2266 pli
.reset(pLexInterface
);
2269 int SCI_METHOD
Document::SetLineState(Sci_Position line
, int state
) {
2270 const int statePrevious
= States()->SetLineState(static_cast<Sci::Line
>(line
), state
);
2271 if (state
!= statePrevious
) {
2272 const DocModification
mh(SC_MOD_CHANGELINESTATE
, LineStart(line
), 0, 0, nullptr,
2273 static_cast<Sci::Line
>(line
));
2276 return statePrevious
;
2279 int SCI_METHOD
Document::GetLineState(Sci_Position line
) const {
2280 return States()->GetLineState(static_cast<Sci::Line
>(line
));
2283 Sci::Line
Document::GetMaxLineState() const {
2284 return States()->GetMaxLineState();
2287 void SCI_METHOD
Document::ChangeLexerState(Sci_Position start
, Sci_Position end
) {
2288 const DocModification
mh(SC_MOD_LEXERSTATE
, start
,
2289 end
-start
, 0, 0, 0);
2293 StyledText
Document::MarginStyledText(Sci::Line line
) const {
2294 const LineAnnotation
*pla
= Margins();
2295 return StyledText(pla
->Length(line
), pla
->Text(line
),
2296 pla
->MultipleStyles(line
), pla
->Style(line
), pla
->Styles(line
));
2299 void Document::MarginSetText(Sci::Line line
, const char *text
) {
2300 Margins()->SetText(line
, text
);
2301 const DocModification
mh(SC_MOD_CHANGEMARGIN
, LineStart(line
),
2306 void Document::MarginSetStyle(Sci::Line line
, int style
) {
2307 Margins()->SetStyle(line
, style
);
2308 NotifyModified(DocModification(SC_MOD_CHANGEMARGIN
, LineStart(line
),
2312 void Document::MarginSetStyles(Sci::Line line
, const unsigned char *styles
) {
2313 Margins()->SetStyles(line
, styles
);
2314 NotifyModified(DocModification(SC_MOD_CHANGEMARGIN
, LineStart(line
),
2318 void Document::MarginClearAll() {
2319 const Sci::Line maxEditorLine
= LinesTotal();
2320 for (Sci::Line l
=0; l
<maxEditorLine
; l
++)
2321 MarginSetText(l
, nullptr);
2322 // Free remaining data
2323 Margins()->ClearAll();
2326 StyledText
Document::AnnotationStyledText(Sci::Line line
) const {
2327 const LineAnnotation
*pla
= Annotations();
2328 return StyledText(pla
->Length(line
), pla
->Text(line
),
2329 pla
->MultipleStyles(line
), pla
->Style(line
), pla
->Styles(line
));
2332 void Document::AnnotationSetText(Sci::Line line
, const char *text
) {
2333 if (line
>= 0 && line
< LinesTotal()) {
2334 const Sci::Line linesBefore
= AnnotationLines(line
);
2335 Annotations()->SetText(line
, text
);
2336 const int linesAfter
= AnnotationLines(line
);
2337 DocModification
mh(SC_MOD_CHANGEANNOTATION
, LineStart(line
),
2339 mh
.annotationLinesAdded
= linesAfter
- linesBefore
;
2344 void Document::AnnotationSetStyle(Sci::Line line
, int style
) {
2345 Annotations()->SetStyle(line
, style
);
2346 const DocModification
mh(SC_MOD_CHANGEANNOTATION
, LineStart(line
),
2351 void Document::AnnotationSetStyles(Sci::Line line
, const unsigned char *styles
) {
2352 if (line
>= 0 && line
< LinesTotal()) {
2353 Annotations()->SetStyles(line
, styles
);
2357 int Document::AnnotationLines(Sci::Line line
) const {
2358 return Annotations()->Lines(line
);
2361 void Document::AnnotationClearAll() {
2362 const Sci::Line maxEditorLine
= LinesTotal();
2363 for (Sci::Line l
=0; l
<maxEditorLine
; l
++)
2364 AnnotationSetText(l
, nullptr);
2365 // Free remaining data
2366 Annotations()->ClearAll();
2369 void Document::IncrementStyleClock() noexcept
{
2370 styleClock
= (styleClock
+ 1) % 0x100000;
2373 void SCI_METHOD
Document::DecorationSetCurrentIndicator(int indicator
) {
2374 decorations
->SetCurrentIndicator(indicator
);
2377 void SCI_METHOD
Document::DecorationFillRange(Sci_Position position
, int value
, Sci_Position fillLength
) {
2378 const FillResult
<Sci::Position
> fr
= decorations
->FillRange(
2379 position
, value
, fillLength
);
2381 const DocModification
mh(SC_MOD_CHANGEINDICATOR
| SC_PERFORMED_USER
,
2382 fr
.position
, fr
.fillLength
);
2387 bool Document::AddWatcher(DocWatcher
*watcher
, void *userData
) {
2388 const WatcherWithUserData
wwud(watcher
, userData
);
2389 std::vector
<WatcherWithUserData
>::iterator it
=
2390 std::find(watchers
.begin(), watchers
.end(), wwud
);
2391 if (it
!= watchers
.end())
2393 watchers
.push_back(wwud
);
2397 bool Document::RemoveWatcher(DocWatcher
*watcher
, void *userData
) {
2398 std::vector
<WatcherWithUserData
>::iterator it
=
2399 std::find(watchers
.begin(), watchers
.end(), WatcherWithUserData(watcher
, userData
));
2400 if (it
!= watchers
.end()) {
2407 void Document::NotifyModifyAttempt() {
2408 for (const WatcherWithUserData
&watcher
: watchers
) {
2409 watcher
.watcher
->NotifyModifyAttempt(this, watcher
.userData
);
2413 void Document::NotifySavePoint(bool atSavePoint
) {
2414 for (const WatcherWithUserData
&watcher
: watchers
) {
2415 watcher
.watcher
->NotifySavePoint(this, watcher
.userData
, atSavePoint
);
2419 void Document::NotifyModified(DocModification mh
) {
2420 if (mh
.modificationType
& SC_MOD_INSERTTEXT
) {
2421 decorations
->InsertSpace(mh
.position
, mh
.length
);
2422 } else if (mh
.modificationType
& SC_MOD_DELETETEXT
) {
2423 decorations
->DeleteRange(mh
.position
, mh
.length
);
2425 for (const WatcherWithUserData
&watcher
: watchers
) {
2426 watcher
.watcher
->NotifyModified(this, mh
, watcher
.userData
);
2430 // Used for word part navigation.
2431 static bool IsASCIIPunctuationCharacter(unsigned int ch
) noexcept
{
2471 bool Document::IsWordPartSeparator(unsigned int ch
) const {
2472 return (WordCharacterClass(ch
) == CharClassify::ccWord
) && IsASCIIPunctuationCharacter(ch
);
2475 Sci::Position
Document::WordPartLeft(Sci::Position pos
) const {
2477 pos
-= CharacterBefore(pos
).widthBytes
;
2478 CharacterExtracted ceStart
= CharacterAfter(pos
);
2479 if (IsWordPartSeparator(ceStart
.character
)) {
2480 while (pos
> 0 && IsWordPartSeparator(CharacterAfter(pos
).character
)) {
2481 pos
-= CharacterBefore(pos
).widthBytes
;
2485 ceStart
= CharacterAfter(pos
);
2486 pos
-= CharacterBefore(pos
).widthBytes
;
2487 if (IsLowerCase(ceStart
.character
)) {
2488 while (pos
> 0 && IsLowerCase(CharacterAfter(pos
).character
))
2489 pos
-= CharacterBefore(pos
).widthBytes
;
2490 if (!IsUpperCase(CharacterAfter(pos
).character
) && !IsLowerCase(CharacterAfter(pos
).character
))
2491 pos
+= CharacterAfter(pos
).widthBytes
;
2492 } else if (IsUpperCase(ceStart
.character
)) {
2493 while (pos
> 0 && IsUpperCase(CharacterAfter(pos
).character
))
2494 pos
-= CharacterBefore(pos
).widthBytes
;
2495 if (!IsUpperCase(CharacterAfter(pos
).character
))
2496 pos
+= CharacterAfter(pos
).widthBytes
;
2497 } else if (IsADigit(ceStart
.character
)) {
2498 while (pos
> 0 && IsADigit(CharacterAfter(pos
).character
))
2499 pos
-= CharacterBefore(pos
).widthBytes
;
2500 if (!IsADigit(CharacterAfter(pos
).character
))
2501 pos
+= CharacterAfter(pos
).widthBytes
;
2502 } else if (IsASCIIPunctuationCharacter(ceStart
.character
)) {
2503 while (pos
> 0 && IsASCIIPunctuationCharacter(CharacterAfter(pos
).character
))
2504 pos
-= CharacterBefore(pos
).widthBytes
;
2505 if (!IsASCIIPunctuationCharacter(CharacterAfter(pos
).character
))
2506 pos
+= CharacterAfter(pos
).widthBytes
;
2507 } else if (isspacechar(ceStart
.character
)) {
2508 while (pos
> 0 && isspacechar(CharacterAfter(pos
).character
))
2509 pos
-= CharacterBefore(pos
).widthBytes
;
2510 if (!isspacechar(CharacterAfter(pos
).character
))
2511 pos
+= CharacterAfter(pos
).widthBytes
;
2512 } else if (!IsASCII(ceStart
.character
)) {
2513 while (pos
> 0 && !IsASCII(CharacterAfter(pos
).character
))
2514 pos
-= CharacterBefore(pos
).widthBytes
;
2515 if (IsASCII(CharacterAfter(pos
).character
))
2516 pos
+= CharacterAfter(pos
).widthBytes
;
2518 pos
+= CharacterAfter(pos
).widthBytes
;
2525 Sci::Position
Document::WordPartRight(Sci::Position pos
) const {
2526 CharacterExtracted ceStart
= CharacterAfter(pos
);
2527 const Sci::Position length
= Length();
2528 if (IsWordPartSeparator(ceStart
.character
)) {
2529 while (pos
< length
&& IsWordPartSeparator(CharacterAfter(pos
).character
))
2530 pos
+= CharacterAfter(pos
).widthBytes
;
2531 ceStart
= CharacterAfter(pos
);
2533 if (!IsASCII(ceStart
.character
)) {
2534 while (pos
< length
&& !IsASCII(CharacterAfter(pos
).character
))
2535 pos
+= CharacterAfter(pos
).widthBytes
;
2536 } else if (IsLowerCase(ceStart
.character
)) {
2537 while (pos
< length
&& IsLowerCase(CharacterAfter(pos
).character
))
2538 pos
+= CharacterAfter(pos
).widthBytes
;
2539 } else if (IsUpperCase(ceStart
.character
)) {
2540 if (IsLowerCase(CharacterAfter(pos
+ ceStart
.widthBytes
).character
)) {
2541 pos
+= CharacterAfter(pos
).widthBytes
;
2542 while (pos
< length
&& IsLowerCase(CharacterAfter(pos
).character
))
2543 pos
+= CharacterAfter(pos
).widthBytes
;
2545 while (pos
< length
&& IsUpperCase(CharacterAfter(pos
).character
))
2546 pos
+= CharacterAfter(pos
).widthBytes
;
2548 if (IsLowerCase(CharacterAfter(pos
).character
) && IsUpperCase(CharacterBefore(pos
).character
))
2549 pos
-= CharacterBefore(pos
).widthBytes
;
2550 } else if (IsADigit(ceStart
.character
)) {
2551 while (pos
< length
&& IsADigit(CharacterAfter(pos
).character
))
2552 pos
+= CharacterAfter(pos
).widthBytes
;
2553 } else if (IsASCIIPunctuationCharacter(ceStart
.character
)) {
2554 while (pos
< length
&& IsASCIIPunctuationCharacter(CharacterAfter(pos
).character
))
2555 pos
+= CharacterAfter(pos
).widthBytes
;
2556 } else if (isspacechar(ceStart
.character
)) {
2557 while (pos
< length
&& isspacechar(CharacterAfter(pos
).character
))
2558 pos
+= CharacterAfter(pos
).widthBytes
;
2560 pos
+= CharacterAfter(pos
).widthBytes
;
2565 static constexpr bool IsLineEndChar(char c
) noexcept
{
2566 return (c
== '\n' || c
== '\r');
2569 Sci::Position
Document::ExtendStyleRange(Sci::Position pos
, int delta
, bool singleLine
) {
2570 const int sStart
= cb
.StyleAt(pos
);
2572 while (pos
> 0 && (cb
.StyleAt(pos
) == sStart
) && (!singleLine
|| !IsLineEndChar(cb
.CharAt(pos
))))
2576 while (pos
< (Length()) && (cb
.StyleAt(pos
) == sStart
) && (!singleLine
|| !IsLineEndChar(cb
.CharAt(pos
))))
2582 static char BraceOpposite(char ch
) noexcept
{
2605 // TODO: should be able to extend styled region to find matching brace
2606 Sci::Position
Document::BraceMatch(Sci::Position position
, Sci::Position
/*maxReStyle*/) {
2607 const char chBrace
= CharAt(position
);
2608 const char chSeek
= BraceOpposite(chBrace
);
2611 const int styBrace
= StyleIndexAt(position
);
2613 if (chBrace
== '(' || chBrace
== '[' || chBrace
== '{' || chBrace
== '<')
2616 position
= NextPosition(position
, direction
);
2617 while ((position
>= 0) && (position
< Length())) {
2618 const char chAtPos
= CharAt(position
);
2619 const int styAtPos
= StyleIndexAt(position
);
2620 if ((position
> GetEndStyled()) || (styAtPos
== styBrace
)) {
2621 if (chAtPos
== chBrace
)
2623 if (chAtPos
== chSeek
)
2628 const Sci::Position positionBeforeMove
= position
;
2629 position
= NextPosition(position
, direction
);
2630 if (position
== positionBeforeMove
)
2637 * Implementation of RegexSearchBase for the default built-in regular expression engine
2639 class BuiltinRegex
: public RegexSearchBase
{
2641 explicit BuiltinRegex(CharClassify
*charClassTable
) : search(charClassTable
) {}
2642 BuiltinRegex(const BuiltinRegex
&) = delete;
2643 BuiltinRegex(BuiltinRegex
&&) = delete;
2644 BuiltinRegex
&operator=(const BuiltinRegex
&) = delete;
2645 BuiltinRegex
&operator=(BuiltinRegex
&&) = delete;
2646 ~BuiltinRegex() override
= default;
2648 Sci::Position
FindText(Document
*doc
, Sci::Position minPos
, Sci::Position maxPos
, const char *s
,
2649 bool caseSensitive
, bool word
, bool wordStart
, int flags
,
2650 Sci::Position
*length
) override
;
2652 const char *SubstituteByPosition(Document
*doc
, const char *text
, Sci::Position
*length
) override
;
2656 std::string substituted
;
2662 * RESearchRange keeps track of search range.
2664 class RESearchRange
{
2666 const Document
*doc
;
2668 Sci::Position startPos
;
2669 Sci::Position endPos
;
2670 Sci::Line lineRangeStart
;
2671 Sci::Line lineRangeEnd
;
2672 Sci::Line lineRangeBreak
;
2673 RESearchRange(const Document
*doc_
, Sci::Position minPos
, Sci::Position maxPos
) : doc(doc_
) {
2674 increment
= (minPos
<= maxPos
) ? 1 : -1;
2676 // Range endpoints should not be inside DBCS characters or between a CR and LF,
2677 // but just in case, move them.
2678 startPos
= doc
->MovePositionOutsideChar(minPos
, 1, true);
2679 endPos
= doc
->MovePositionOutsideChar(maxPos
, 1, true);
2681 lineRangeStart
= doc
->SciLineFromPosition(startPos
);
2682 lineRangeEnd
= doc
->SciLineFromPosition(endPos
);
2683 lineRangeBreak
= lineRangeEnd
+ increment
;
2685 Range
LineRange(Sci::Line line
) const {
2686 Range
range(doc
->LineStart(line
), doc
->LineEnd(line
));
2687 if (increment
== 1) {
2688 if (line
== lineRangeStart
)
2689 range
.start
= startPos
;
2690 if (line
== lineRangeEnd
)
2693 if (line
== lineRangeEnd
)
2694 range
.start
= endPos
;
2695 if (line
== lineRangeStart
)
2696 range
.end
= startPos
;
2702 // Define a way for the Regular Expression code to access the document
2703 class DocumentIndexer
: public CharacterIndexer
{
2707 DocumentIndexer(Document
*pdoc_
, Sci::Position end_
) noexcept
:
2708 pdoc(pdoc_
), end(end_
) {
2711 DocumentIndexer(const DocumentIndexer
&) = delete;
2712 DocumentIndexer(DocumentIndexer
&&) = delete;
2713 DocumentIndexer
&operator=(const DocumentIndexer
&) = delete;
2714 DocumentIndexer
&operator=(DocumentIndexer
&&) = delete;
2716 ~DocumentIndexer() override
= default;
2718 char CharAt(Sci::Position index
) const noexcept override
{
2719 if (index
< 0 || index
>= end
)
2722 return pdoc
->CharAt(index
);
2726 #ifndef NO_CXX11_REGEX
2728 class ByteIterator
{
2730 typedef std::bidirectional_iterator_tag iterator_category
;
2731 typedef char value_type
;
2732 typedef ptrdiff_t difference_type
;
2733 typedef char* pointer
;
2734 typedef char& reference
;
2736 const Document
*doc
;
2737 Sci::Position position
;
2739 ByteIterator(const Document
*doc_
=nullptr, Sci::Position position_
=0) noexcept
:
2740 doc(doc_
), position(position_
) {
2742 ByteIterator(const ByteIterator
&other
) noexcept
{
2744 position
= other
.position
;
2746 ByteIterator(ByteIterator
&&other
) noexcept
{
2748 position
= other
.position
;
2750 ByteIterator
&operator=(const ByteIterator
&other
) noexcept
{
2751 if (this != &other
) {
2753 position
= other
.position
;
2757 ByteIterator
&operator=(ByteIterator
&&) noexcept
= default;
2758 ~ByteIterator() = default;
2759 char operator*() const noexcept
{
2760 return doc
->CharAt(position
);
2762 ByteIterator
&operator++() noexcept
{
2766 ByteIterator
operator++(int) noexcept
{
2767 ByteIterator
retVal(*this);
2771 ByteIterator
&operator--() noexcept
{
2775 bool operator==(const ByteIterator
&other
) const noexcept
{
2776 return doc
== other
.doc
&& position
== other
.position
;
2778 bool operator!=(const ByteIterator
&other
) const noexcept
{
2779 return doc
!= other
.doc
|| position
!= other
.position
;
2781 Sci::Position
Pos() const noexcept
{
2784 Sci::Position
PosRoundUp() const noexcept
{
2789 // On Windows, wchar_t is 16 bits wide and on Unix it is 32 bits wide.
2790 // Would be better to use sizeof(wchar_t) or similar to differentiate
2791 // but easier for now to hard-code platforms.
2792 // C++11 has char16_t and char32_t but neither Clang nor Visual C++
2793 // appear to allow specializing basic_regex over these.
2796 #define WCHAR_T_IS_16 1
2798 #define WCHAR_T_IS_16 0
2803 // On Windows, report non-BMP characters as 2 separate surrogates as that
2804 // matches wregex since it is based on wchar_t.
2805 class UTF8Iterator
{
2806 // These 3 fields determine the iterator position and are used for comparisons
2807 const Document
*doc
;
2808 Sci::Position position
;
2809 size_t characterIndex
;
2810 // Remaining fields are derived from the determining fields so are excluded in comparisons
2811 unsigned int lenBytes
;
2812 size_t lenCharacters
;
2813 wchar_t buffered
[2];
2815 typedef std::bidirectional_iterator_tag iterator_category
;
2816 typedef wchar_t value_type
;
2817 typedef ptrdiff_t difference_type
;
2818 typedef wchar_t* pointer
;
2819 typedef wchar_t& reference
;
2821 UTF8Iterator(const Document
*doc_
=nullptr, Sci::Position position_
=0) noexcept
:
2822 doc(doc_
), position(position_
), characterIndex(0), lenBytes(0), lenCharacters(0), buffered
{} {
2829 UTF8Iterator(const UTF8Iterator
&other
) noexcept
: buffered
{} {
2831 position
= other
.position
;
2832 characterIndex
= other
.characterIndex
;
2833 lenBytes
= other
.lenBytes
;
2834 lenCharacters
= other
.lenCharacters
;
2835 buffered
[0] = other
.buffered
[0];
2836 buffered
[1] = other
.buffered
[1];
2838 UTF8Iterator(UTF8Iterator
&&other
) noexcept
= default;
2839 UTF8Iterator
&operator=(const UTF8Iterator
&other
) noexcept
{
2840 if (this != &other
) {
2842 position
= other
.position
;
2843 characterIndex
= other
.characterIndex
;
2844 lenBytes
= other
.lenBytes
;
2845 lenCharacters
= other
.lenCharacters
;
2846 buffered
[0] = other
.buffered
[0];
2847 buffered
[1] = other
.buffered
[1];
2851 UTF8Iterator
&operator=(UTF8Iterator
&&) noexcept
= default;
2852 ~UTF8Iterator() = default;
2853 wchar_t operator*() const noexcept
{
2854 assert(lenCharacters
!= 0);
2855 return buffered
[characterIndex
];
2857 UTF8Iterator
&operator++() noexcept
{
2858 if ((characterIndex
+ 1) < (lenCharacters
)) {
2861 position
+= lenBytes
;
2867 UTF8Iterator
operator++(int) noexcept
{
2868 UTF8Iterator
retVal(*this);
2869 if ((characterIndex
+ 1) < (lenCharacters
)) {
2872 position
+= lenBytes
;
2878 UTF8Iterator
&operator--() noexcept
{
2879 if (characterIndex
) {
2882 position
= doc
->NextPosition(position
, -1);
2884 characterIndex
= lenCharacters
- 1;
2888 bool operator==(const UTF8Iterator
&other
) const noexcept
{
2889 // Only test the determining fields, not the character widths and values derived from this
2890 return doc
== other
.doc
&&
2891 position
== other
.position
&&
2892 characterIndex
== other
.characterIndex
;
2894 bool operator!=(const UTF8Iterator
&other
) const noexcept
{
2895 // Only test the determining fields, not the character widths and values derived from this
2896 return doc
!= other
.doc
||
2897 position
!= other
.position
||
2898 characterIndex
!= other
.characterIndex
;
2900 Sci::Position
Pos() const noexcept
{
2903 Sci::Position
PosRoundUp() const noexcept
{
2905 return position
+ lenBytes
; // Force to end of character
2910 void ReadCharacter() noexcept
{
2911 const Document::CharacterExtracted charExtracted
= doc
->ExtractCharacter(position
);
2912 lenBytes
= charExtracted
.widthBytes
;
2913 if (charExtracted
.character
== unicodeReplacementChar
) {
2915 buffered
[0] = static_cast<wchar_t>(charExtracted
.character
);
2917 lenCharacters
= UTF16FromUTF32Character(charExtracted
.character
, buffered
);
2924 // On Unix, report non-BMP characters as single characters
2926 class UTF8Iterator
{
2927 const Document
*doc
;
2928 Sci::Position position
;
2930 typedef std::bidirectional_iterator_tag iterator_category
;
2931 typedef wchar_t value_type
;
2932 typedef ptrdiff_t difference_type
;
2933 typedef wchar_t* pointer
;
2934 typedef wchar_t& reference
;
2936 UTF8Iterator(const Document
*doc_
=nullptr, Sci::Position position_
=0) noexcept
:
2937 doc(doc_
), position(position_
) {
2939 UTF8Iterator(const UTF8Iterator
&other
) noexcept
{
2941 position
= other
.position
;
2943 UTF8Iterator(UTF8Iterator
&&other
) noexcept
= default;
2944 UTF8Iterator
&operator=(const UTF8Iterator
&other
) noexcept
{
2945 if (this != &other
) {
2947 position
= other
.position
;
2951 UTF8Iterator
&operator=(UTF8Iterator
&&) noexcept
= default;
2952 ~UTF8Iterator() = default;
2953 wchar_t operator*() const noexcept
{
2954 const Document::CharacterExtracted charExtracted
= doc
->ExtractCharacter(position
);
2955 return charExtracted
.character
;
2957 UTF8Iterator
&operator++() noexcept
{
2958 position
= doc
->NextPosition(position
, 1);
2961 UTF8Iterator
operator++(int) noexcept
{
2962 UTF8Iterator
retVal(*this);
2963 position
= doc
->NextPosition(position
, 1);
2966 UTF8Iterator
&operator--() noexcept
{
2967 position
= doc
->NextPosition(position
, -1);
2970 bool operator==(const UTF8Iterator
&other
) const noexcept
{
2971 return doc
== other
.doc
&& position
== other
.position
;
2973 bool operator!=(const UTF8Iterator
&other
) const noexcept
{
2974 return doc
!= other
.doc
|| position
!= other
.position
;
2976 Sci::Position
Pos() const noexcept
{
2979 Sci::Position
PosRoundUp() const noexcept
{
2986 std::regex_constants::match_flag_type
MatchFlags(const Document
*doc
, Sci::Position startPos
, Sci::Position endPos
) {
2987 std::regex_constants::match_flag_type flagsMatch
= std::regex_constants::match_default
;
2988 if (!doc
->IsLineStartPosition(startPos
))
2989 flagsMatch
|= std::regex_constants::match_not_bol
;
2990 if (!doc
->IsLineEndPosition(endPos
))
2991 flagsMatch
|= std::regex_constants::match_not_eol
;
2995 template<typename Iterator
, typename Regex
>
2996 bool MatchOnLines(const Document
*doc
, const Regex
®exp
, const RESearchRange
&resr
, RESearch
&search
) {
2997 std::match_results
<Iterator
> match
;
2999 // MSVC and libc++ have problems with ^ and $ matching line ends inside a range.
3000 // CRLF line ends are also a problem as ^ and $ only treat LF as a line end.
3001 // The std::regex::multiline option was added to C++17 to improve behaviour but
3002 // has not been implemented by compiler runtimes with MSVC always in multiline
3003 // mode and libc++ and libstdc++ always in single-line mode.
3004 // If multiline regex worked well then the line by line iteration could be removed
3005 // for the forwards case and replaced with the following 4 lines:
3006 #ifdef REGEX_MULTILINE
3007 Iterator
itStart(doc
, resr
.startPos
);
3008 Iterator
itEnd(doc
, resr
.endPos
);
3009 const std::regex_constants::match_flag_type flagsMatch
= MatchFlags(doc
, resr
.startPos
, resr
.endPos
);
3010 const bool matched
= std::regex_search(itStart
, itEnd
, match
, regexp
, flagsMatch
);
3013 bool matched
= false;
3014 for (Sci::Line line
= resr
.lineRangeStart
; line
!= resr
.lineRangeBreak
; line
+= resr
.increment
) {
3015 const Range lineRange
= resr
.LineRange(line
);
3016 Iterator
itStart(doc
, lineRange
.start
);
3017 Iterator
itEnd(doc
, lineRange
.end
);
3018 std::regex_constants::match_flag_type flagsMatch
= MatchFlags(doc
, lineRange
.start
, lineRange
.end
);
3019 matched
= std::regex_search(itStart
, itEnd
, match
, regexp
, flagsMatch
);
3020 // Check for the last match on this line.
3022 if (resr
.increment
== -1) {
3024 Iterator
itNext(doc
, match
[0].second
.PosRoundUp());
3025 flagsMatch
= MatchFlags(doc
, itNext
.Pos(), lineRange
.end
);
3026 std::match_results
<Iterator
> matchNext
;
3027 matched
= std::regex_search(itNext
, itEnd
, matchNext
, regexp
, flagsMatch
);
3029 if (match
[0].first
== match
[0].second
) {
3030 // Empty match means failure so exit
3043 for (size_t co
= 0; co
< match
.size(); co
++) {
3044 search
.bopat
[co
] = match
[co
].first
.Pos();
3045 search
.eopat
[co
] = match
[co
].second
.PosRoundUp();
3046 const Sci::Position lenMatch
= search
.eopat
[co
] - search
.bopat
[co
];
3047 search
.pat
[co
].resize(lenMatch
);
3048 for (Sci::Position iPos
= 0; iPos
< lenMatch
; iPos
++) {
3049 search
.pat
[co
][iPos
] = doc
->CharAt(iPos
+ search
.bopat
[co
]);
3056 Sci::Position
Cxx11RegexFindText(const Document
*doc
, Sci::Position minPos
, Sci::Position maxPos
, const char *s
,
3057 bool caseSensitive
, Sci::Position
*length
, RESearch
&search
) {
3058 const RESearchRange
resr(doc
, minPos
, maxPos
);
3061 std::regex::flag_type flagsRe
= std::regex::ECMAScript
;
3062 // Flags that apper to have no effect:
3063 // | std::regex::collate | std::regex::extended;
3065 flagsRe
= flagsRe
| std::regex::icase
;
3067 // Clear the RESearch so can fill in matches
3070 bool matched
= false;
3071 if (SC_CP_UTF8
== doc
->dbcsCodePage
) {
3072 const std::wstring ws
= WStringFromUTF8(s
, strlen(s
));
3074 regexp
.assign(ws
, flagsRe
);
3075 matched
= MatchOnLines
<UTF8Iterator
>(doc
, regexp
, resr
, search
);
3079 regexp
.assign(s
, flagsRe
);
3080 matched
= MatchOnLines
<ByteIterator
>(doc
, regexp
, resr
, search
);
3083 Sci::Position posMatch
= -1;
3085 posMatch
= search
.bopat
[0];
3086 *length
= search
.eopat
[0] - search
.bopat
[0];
3088 // Example - search in doc/ScintillaHistory.html for
3089 // [[:upper:]]eta[[:space:]]
3090 // On MacBook, normally around 1 second but with locale imbued -> 14 seconds.
3091 //const double durSearch = ep.Duration(true);
3092 //Platform::DebugPrintf("Search:%9.6g \n", durSearch);
3094 } catch (std::regex_error
&) {
3095 // Failed to create regular expression
3098 // Failed in some other way
3107 Sci::Position
BuiltinRegex::FindText(Document
*doc
, Sci::Position minPos
, Sci::Position maxPos
, const char *s
,
3108 bool caseSensitive
, bool, bool, int flags
,
3109 Sci::Position
*length
) {
3111 #ifndef NO_CXX11_REGEX
3112 if (flags
& SCFIND_CXX11REGEX
) {
3113 return Cxx11RegexFindText(doc
, minPos
, maxPos
, s
,
3114 caseSensitive
, length
, search
);
3118 const RESearchRange
resr(doc
, minPos
, maxPos
);
3120 const bool posix
= (flags
& SCFIND_POSIX
) != 0;
3122 const char *errmsg
= search
.Compile(s
, *length
, caseSensitive
, posix
);
3126 // Find a variable in a property file: \$(\([A-Za-z0-9_.]+\))
3127 // Replace first '.' with '-' in each property file variable reference:
3128 // Search: \$(\([A-Za-z0-9_-]+\)\.\([A-Za-z0-9_.]+\))
3129 // Replace: $(\1-\2)
3130 Sci::Position pos
= -1;
3131 Sci::Position lenRet
= 0;
3132 const bool searchforLineStart
= s
[0] == '^';
3133 const char searchEnd
= s
[*length
- 1];
3134 const char searchEndPrev
= (*length
> 1) ? s
[*length
- 2] : '\0';
3135 const bool searchforLineEnd
= (searchEnd
== '$') && (searchEndPrev
!= '\\');
3136 for (Sci::Line line
= resr
.lineRangeStart
; line
!= resr
.lineRangeBreak
; line
+= resr
.increment
) {
3137 Sci::Position startOfLine
= doc
->LineStart(line
);
3138 Sci::Position endOfLine
= doc
->LineEnd(line
);
3139 if (resr
.increment
== 1) {
3140 if (line
== resr
.lineRangeStart
) {
3141 if ((resr
.startPos
!= startOfLine
) && searchforLineStart
)
3142 continue; // Can't match start of line if start position after start of line
3143 startOfLine
= resr
.startPos
;
3145 if (line
== resr
.lineRangeEnd
) {
3146 if ((resr
.endPos
!= endOfLine
) && searchforLineEnd
)
3147 continue; // Can't match end of line if end position before end of line
3148 endOfLine
= resr
.endPos
;
3151 if (line
== resr
.lineRangeEnd
) {
3152 if ((resr
.endPos
!= startOfLine
) && searchforLineStart
)
3153 continue; // Can't match start of line if end position after start of line
3154 startOfLine
= resr
.endPos
;
3156 if (line
== resr
.lineRangeStart
) {
3157 if ((resr
.startPos
!= endOfLine
) && searchforLineEnd
)
3158 continue; // Can't match end of line if start position before end of line
3159 endOfLine
= resr
.startPos
;
3163 const DocumentIndexer
di(doc
, endOfLine
);
3164 int success
= search
.Execute(di
, startOfLine
, endOfLine
);
3166 pos
= search
.bopat
[0];
3167 // Ensure only whole characters selected
3168 search
.eopat
[0] = doc
->MovePositionOutsideChar(search
.eopat
[0], 1, false);
3169 lenRet
= search
.eopat
[0] - search
.bopat
[0];
3170 // There can be only one start of a line, so no need to look for last match in line
3171 if ((resr
.increment
== -1) && !searchforLineStart
) {
3172 // Check for the last match on this line.
3173 int repetitions
= 1000; // Break out of infinite loop
3174 while (success
&& (search
.eopat
[0] <= endOfLine
) && (repetitions
--)) {
3175 success
= search
.Execute(di
, pos
+1, endOfLine
);
3177 if (search
.eopat
[0] <= minPos
) {
3178 pos
= search
.bopat
[0];
3179 lenRet
= search
.eopat
[0] - search
.bopat
[0];
3193 const char *BuiltinRegex::SubstituteByPosition(Document
*doc
, const char *text
, Sci::Position
*length
) {
3194 substituted
.clear();
3195 const DocumentIndexer
di(doc
, doc
->Length());
3196 search
.GrabMatches(di
);
3197 for (Sci::Position j
= 0; j
< *length
; j
++) {
3198 if (text
[j
] == '\\') {
3199 if (text
[j
+ 1] >= '0' && text
[j
+ 1] <= '9') {
3200 const unsigned int patNum
= text
[j
+ 1] - '0';
3201 const Sci::Position len
= search
.eopat
[patNum
] - search
.bopat
[patNum
];
3202 if (!search
.pat
[patNum
].empty()) // Will be null if try for a match that did not occur
3203 substituted
.append(search
.pat
[patNum
].c_str(), len
);
3209 substituted
.push_back('\a');
3212 substituted
.push_back('\b');
3215 substituted
.push_back('\f');
3218 substituted
.push_back('\n');
3221 substituted
.push_back('\r');
3224 substituted
.push_back('\t');
3227 substituted
.push_back('\v');
3230 substituted
.push_back('\\');
3233 substituted
.push_back('\\');
3238 substituted
.push_back(text
[j
]);
3241 *length
= substituted
.length();
3242 return substituted
.c_str();
3245 #ifndef SCI_OWNREGEX
3247 RegexSearchBase
*Scintilla::CreateRegexSearch(CharClassify
*charClassTable
) {
3248 return new BuiltinRegex(charClassTable
);