Update Scintilla to version 3.6.3
[geany-mirror.git] / scintilla / src / Document.cxx
blobb0744a21cae017f706ea74c0bcf08a63d3ed7c66
1 // Scintilla source code edit control
2 /** @file Document.cxx
3 ** Text document that handles notifications, DBCS, styling, words and end of line.
4 **/
5 // Copyright 1998-2011 by Neil Hodgson <neilh@scintilla.org>
6 // The License.txt file describes the conditions under which this software may be distributed.
8 #include <stdlib.h>
9 #include <string.h>
10 #include <stdio.h>
11 #include <assert.h>
12 #include <ctype.h>
14 #include <stdexcept>
15 #include <string>
16 #include <vector>
17 #include <algorithm>
19 #ifdef CXX11_REGEX
20 #include <regex>
21 #endif
23 #include "Platform.h"
25 #include "ILexer.h"
26 #include "Scintilla.h"
28 #include "CharacterSet.h"
29 #include "Position.h"
30 #include "SplitVector.h"
31 #include "Partitioning.h"
32 #include "RunStyles.h"
33 #include "CellBuffer.h"
34 #include "PerLine.h"
35 #include "CharClassify.h"
36 #include "Decoration.h"
37 #include "CaseFolder.h"
38 #include "Document.h"
39 #include "RESearch.h"
40 #include "UniConversion.h"
41 #include "UnicodeFromUTF8.h"
43 #ifdef SCI_NAMESPACE
44 using namespace Scintilla;
45 #endif
47 static inline bool IsPunctuation(char ch) {
48 return IsASCII(ch) && ispunct(ch);
51 void LexInterface::Colourise(int start, int end) {
52 if (pdoc && instance && !performingStyle) {
53 // Protect against reentrance, which may occur, for example, when
54 // fold points are discovered while performing styling and the folding
55 // code looks for child lines which may trigger styling.
56 performingStyle = true;
58 int lengthDoc = pdoc->Length();
59 if (end == -1)
60 end = lengthDoc;
61 int len = end - start;
63 PLATFORM_ASSERT(len >= 0);
64 PLATFORM_ASSERT(start + len <= lengthDoc);
66 int styleStart = 0;
67 if (start > 0)
68 styleStart = pdoc->StyleAt(start - 1);
70 if (len > 0) {
71 instance->Lex(start, len, styleStart, pdoc);
72 instance->Fold(start, len, styleStart, pdoc);
75 performingStyle = false;
79 int LexInterface::LineEndTypesSupported() {
80 if (instance) {
81 int interfaceVersion = instance->Version();
82 if (interfaceVersion >= lvSubStyles) {
83 ILexerWithSubStyles *ssinstance = static_cast<ILexerWithSubStyles *>(instance);
84 return ssinstance->LineEndTypesSupported();
87 return 0;
90 Document::Document() {
91 refCount = 0;
92 pcf = NULL;
93 #ifdef _WIN32
94 eolMode = SC_EOL_CRLF;
95 #else
96 eolMode = SC_EOL_LF;
97 #endif
98 dbcsCodePage = 0;
99 lineEndBitSet = SC_LINE_END_TYPE_DEFAULT;
100 endStyled = 0;
101 styleClock = 0;
102 enteredModification = 0;
103 enteredStyling = 0;
104 enteredReadOnlyCount = 0;
105 insertionSet = false;
106 tabInChars = 8;
107 indentInChars = 0;
108 actualIndentInChars = 8;
109 useTabs = true;
110 tabIndents = true;
111 backspaceUnindents = false;
112 durationStyleOneLine = 0.00001;
114 matchesValid = false;
115 regex = 0;
117 UTF8BytesOfLeadInitialise();
119 perLineData[ldMarkers] = new LineMarkers();
120 perLineData[ldLevels] = new LineLevels();
121 perLineData[ldState] = new LineState();
122 perLineData[ldMargin] = new LineAnnotation();
123 perLineData[ldAnnotation] = new LineAnnotation();
125 cb.SetPerLine(this);
127 pli = 0;
130 Document::~Document() {
131 for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
132 it->watcher->NotifyDeleted(this, it->userData);
134 for (int j=0; j<ldSize; j++) {
135 delete perLineData[j];
136 perLineData[j] = 0;
138 delete regex;
139 regex = 0;
140 delete pli;
141 pli = 0;
142 delete pcf;
143 pcf = 0;
146 void Document::Init() {
147 for (int j=0; j<ldSize; j++) {
148 if (perLineData[j])
149 perLineData[j]->Init();
153 int Document::LineEndTypesSupported() const {
154 if ((SC_CP_UTF8 == dbcsCodePage) && pli)
155 return pli->LineEndTypesSupported();
156 else
157 return 0;
160 bool Document::SetDBCSCodePage(int dbcsCodePage_) {
161 if (dbcsCodePage != dbcsCodePage_) {
162 dbcsCodePage = dbcsCodePage_;
163 SetCaseFolder(NULL);
164 cb.SetLineEndTypes(lineEndBitSet & LineEndTypesSupported());
165 return true;
166 } else {
167 return false;
171 bool Document::SetLineEndTypesAllowed(int lineEndBitSet_) {
172 if (lineEndBitSet != lineEndBitSet_) {
173 lineEndBitSet = lineEndBitSet_;
174 int lineEndBitSetActive = lineEndBitSet & LineEndTypesSupported();
175 if (lineEndBitSetActive != cb.GetLineEndTypes()) {
176 ModifiedAt(0);
177 cb.SetLineEndTypes(lineEndBitSetActive);
178 return true;
179 } else {
180 return false;
182 } else {
183 return false;
187 void Document::InsertLine(int line) {
188 for (int j=0; j<ldSize; j++) {
189 if (perLineData[j])
190 perLineData[j]->InsertLine(line);
194 void Document::RemoveLine(int line) {
195 for (int j=0; j<ldSize; j++) {
196 if (perLineData[j])
197 perLineData[j]->RemoveLine(line);
201 // Increase reference count and return its previous value.
202 int Document::AddRef() {
203 return refCount++;
206 // Decrease reference count and return its previous value.
207 // Delete the document if reference count reaches zero.
208 int SCI_METHOD Document::Release() {
209 int curRefCount = --refCount;
210 if (curRefCount == 0)
211 delete this;
212 return curRefCount;
215 void Document::SetSavePoint() {
216 cb.SetSavePoint();
217 NotifySavePoint(true);
220 void Document::TentativeUndo() {
221 if (!TentativeActive())
222 return;
223 CheckReadOnly();
224 if (enteredModification == 0) {
225 enteredModification++;
226 if (!cb.IsReadOnly()) {
227 bool startSavePoint = cb.IsSavePoint();
228 bool multiLine = false;
229 int steps = cb.TentativeSteps();
230 //Platform::DebugPrintf("Steps=%d\n", steps);
231 for (int step = 0; step < steps; step++) {
232 const int prevLinesTotal = LinesTotal();
233 const Action &action = cb.GetUndoStep();
234 if (action.at == removeAction) {
235 NotifyModified(DocModification(
236 SC_MOD_BEFOREINSERT | SC_PERFORMED_UNDO, action));
237 } else if (action.at == containerAction) {
238 DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_UNDO);
239 dm.token = action.position;
240 NotifyModified(dm);
241 } else {
242 NotifyModified(DocModification(
243 SC_MOD_BEFOREDELETE | SC_PERFORMED_UNDO, action));
245 cb.PerformUndoStep();
246 if (action.at != containerAction) {
247 ModifiedAt(action.position);
250 int modFlags = SC_PERFORMED_UNDO;
251 // With undo, an insertion action becomes a deletion notification
252 if (action.at == removeAction) {
253 modFlags |= SC_MOD_INSERTTEXT;
254 } else if (action.at == insertAction) {
255 modFlags |= SC_MOD_DELETETEXT;
257 if (steps > 1)
258 modFlags |= SC_MULTISTEPUNDOREDO;
259 const int linesAdded = LinesTotal() - prevLinesTotal;
260 if (linesAdded != 0)
261 multiLine = true;
262 if (step == steps - 1) {
263 modFlags |= SC_LASTSTEPINUNDOREDO;
264 if (multiLine)
265 modFlags |= SC_MULTILINEUNDOREDO;
267 NotifyModified(DocModification(modFlags, action.position, action.lenData,
268 linesAdded, action.data));
271 bool endSavePoint = cb.IsSavePoint();
272 if (startSavePoint != endSavePoint)
273 NotifySavePoint(endSavePoint);
275 cb.TentativeCommit();
277 enteredModification--;
281 int Document::GetMark(int line) {
282 return static_cast<LineMarkers *>(perLineData[ldMarkers])->MarkValue(line);
285 int Document::MarkerNext(int lineStart, int mask) const {
286 return static_cast<LineMarkers *>(perLineData[ldMarkers])->MarkerNext(lineStart, mask);
289 int Document::AddMark(int line, int markerNum) {
290 if (line >= 0 && line <= LinesTotal()) {
291 int prev = static_cast<LineMarkers *>(perLineData[ldMarkers])->
292 AddMark(line, markerNum, LinesTotal());
293 DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, 0, line);
294 NotifyModified(mh);
295 return prev;
296 } else {
297 return 0;
301 void Document::AddMarkSet(int line, int valueSet) {
302 if (line < 0 || line > LinesTotal()) {
303 return;
305 unsigned int m = valueSet;
306 for (int i = 0; m; i++, m >>= 1)
307 if (m & 1)
308 static_cast<LineMarkers *>(perLineData[ldMarkers])->
309 AddMark(line, i, LinesTotal());
310 DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, 0, line);
311 NotifyModified(mh);
314 void Document::DeleteMark(int line, int markerNum) {
315 static_cast<LineMarkers *>(perLineData[ldMarkers])->DeleteMark(line, markerNum, false);
316 DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, 0, line);
317 NotifyModified(mh);
320 void Document::DeleteMarkFromHandle(int markerHandle) {
321 static_cast<LineMarkers *>(perLineData[ldMarkers])->DeleteMarkFromHandle(markerHandle);
322 DocModification mh(SC_MOD_CHANGEMARKER, 0, 0, 0, 0);
323 mh.line = -1;
324 NotifyModified(mh);
327 void Document::DeleteAllMarks(int markerNum) {
328 bool someChanges = false;
329 for (int line = 0; line < LinesTotal(); line++) {
330 if (static_cast<LineMarkers *>(perLineData[ldMarkers])->DeleteMark(line, markerNum, true))
331 someChanges = true;
333 if (someChanges) {
334 DocModification mh(SC_MOD_CHANGEMARKER, 0, 0, 0, 0);
335 mh.line = -1;
336 NotifyModified(mh);
340 int Document::LineFromHandle(int markerHandle) {
341 return static_cast<LineMarkers *>(perLineData[ldMarkers])->LineFromHandle(markerHandle);
344 Sci_Position SCI_METHOD Document::LineStart(Sci_Position line) const {
345 return cb.LineStart(line);
348 bool Document::IsLineStartPosition(int position) const {
349 return LineStart(LineFromPosition(position)) == position;
352 Sci_Position SCI_METHOD Document::LineEnd(Sci_Position line) const {
353 if (line >= LinesTotal() - 1) {
354 return LineStart(line + 1);
355 } else {
356 int position = LineStart(line + 1);
357 if (SC_CP_UTF8 == dbcsCodePage) {
358 unsigned char bytes[] = {
359 static_cast<unsigned char>(cb.CharAt(position-3)),
360 static_cast<unsigned char>(cb.CharAt(position-2)),
361 static_cast<unsigned char>(cb.CharAt(position-1)),
363 if (UTF8IsSeparator(bytes)) {
364 return position - UTF8SeparatorLength;
366 if (UTF8IsNEL(bytes+1)) {
367 return position - UTF8NELLength;
370 position--; // Back over CR or LF
371 // When line terminator is CR+LF, may need to go back one more
372 if ((position > LineStart(line)) && (cb.CharAt(position - 1) == '\r')) {
373 position--;
375 return position;
379 void SCI_METHOD Document::SetErrorStatus(int status) {
380 // Tell the watchers an error has occurred.
381 for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
382 it->watcher->NotifyErrorOccurred(this, it->userData, status);
386 Sci_Position SCI_METHOD Document::LineFromPosition(Sci_Position pos) const {
387 return cb.LineFromPosition(pos);
390 int Document::LineEndPosition(int position) const {
391 return LineEnd(LineFromPosition(position));
394 bool Document::IsLineEndPosition(int position) const {
395 return LineEnd(LineFromPosition(position)) == position;
398 bool Document::IsPositionInLineEnd(int position) const {
399 return position >= LineEnd(LineFromPosition(position));
402 int Document::VCHomePosition(int position) const {
403 int line = LineFromPosition(position);
404 int startPosition = LineStart(line);
405 int endLine = LineEnd(line);
406 int startText = startPosition;
407 while (startText < endLine && (cb.CharAt(startText) == ' ' || cb.CharAt(startText) == '\t'))
408 startText++;
409 if (position == startText)
410 return startPosition;
411 else
412 return startText;
415 int SCI_METHOD Document::SetLevel(Sci_Position line, int level) {
416 int prev = static_cast<LineLevels *>(perLineData[ldLevels])->SetLevel(line, level, LinesTotal());
417 if (prev != level) {
418 DocModification mh(SC_MOD_CHANGEFOLD | SC_MOD_CHANGEMARKER,
419 LineStart(line), 0, 0, 0, line);
420 mh.foldLevelNow = level;
421 mh.foldLevelPrev = prev;
422 NotifyModified(mh);
424 return prev;
427 int SCI_METHOD Document::GetLevel(Sci_Position line) const {
428 return static_cast<LineLevels *>(perLineData[ldLevels])->GetLevel(line);
431 void Document::ClearLevels() {
432 static_cast<LineLevels *>(perLineData[ldLevels])->ClearLevels();
435 static bool IsSubordinate(int levelStart, int levelTry) {
436 if (levelTry & SC_FOLDLEVELWHITEFLAG)
437 return true;
438 else
439 return (levelStart & SC_FOLDLEVELNUMBERMASK) < (levelTry & SC_FOLDLEVELNUMBERMASK);
442 int Document::GetLastChild(int lineParent, int level, int lastLine) {
443 if (level == -1)
444 level = GetLevel(lineParent) & SC_FOLDLEVELNUMBERMASK;
445 int maxLine = LinesTotal();
446 int lookLastLine = (lastLine != -1) ? Platform::Minimum(LinesTotal() - 1, lastLine) : -1;
447 int lineMaxSubord = lineParent;
448 while (lineMaxSubord < maxLine - 1) {
449 EnsureStyledTo(LineStart(lineMaxSubord + 2));
450 if (!IsSubordinate(level, GetLevel(lineMaxSubord + 1)))
451 break;
452 if ((lookLastLine != -1) && (lineMaxSubord >= lookLastLine) && !(GetLevel(lineMaxSubord) & SC_FOLDLEVELWHITEFLAG))
453 break;
454 lineMaxSubord++;
456 if (lineMaxSubord > lineParent) {
457 if (level > (GetLevel(lineMaxSubord + 1) & SC_FOLDLEVELNUMBERMASK)) {
458 // Have chewed up some whitespace that belongs to a parent so seek back
459 if (GetLevel(lineMaxSubord) & SC_FOLDLEVELWHITEFLAG) {
460 lineMaxSubord--;
464 return lineMaxSubord;
467 int Document::GetFoldParent(int line) const {
468 int level = GetLevel(line) & SC_FOLDLEVELNUMBERMASK;
469 int lineLook = line - 1;
470 while ((lineLook > 0) && (
471 (!(GetLevel(lineLook) & SC_FOLDLEVELHEADERFLAG)) ||
472 ((GetLevel(lineLook) & SC_FOLDLEVELNUMBERMASK) >= level))
474 lineLook--;
476 if ((GetLevel(lineLook) & SC_FOLDLEVELHEADERFLAG) &&
477 ((GetLevel(lineLook) & SC_FOLDLEVELNUMBERMASK) < level)) {
478 return lineLook;
479 } else {
480 return -1;
484 void Document::GetHighlightDelimiters(HighlightDelimiter &highlightDelimiter, int line, int lastLine) {
485 int level = GetLevel(line);
486 int lookLastLine = Platform::Maximum(line, lastLine) + 1;
488 int lookLine = line;
489 int lookLineLevel = level;
490 int lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
491 while ((lookLine > 0) && ((lookLineLevel & SC_FOLDLEVELWHITEFLAG) ||
492 ((lookLineLevel & SC_FOLDLEVELHEADERFLAG) && (lookLineLevelNum >= (GetLevel(lookLine + 1) & SC_FOLDLEVELNUMBERMASK))))) {
493 lookLineLevel = GetLevel(--lookLine);
494 lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
497 int beginFoldBlock = (lookLineLevel & SC_FOLDLEVELHEADERFLAG) ? lookLine : GetFoldParent(lookLine);
498 if (beginFoldBlock == -1) {
499 highlightDelimiter.Clear();
500 return;
503 int endFoldBlock = GetLastChild(beginFoldBlock, -1, lookLastLine);
504 int firstChangeableLineBefore = -1;
505 if (endFoldBlock < line) {
506 lookLine = beginFoldBlock - 1;
507 lookLineLevel = GetLevel(lookLine);
508 lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
509 while ((lookLine >= 0) && (lookLineLevelNum >= SC_FOLDLEVELBASE)) {
510 if (lookLineLevel & SC_FOLDLEVELHEADERFLAG) {
511 if (GetLastChild(lookLine, -1, lookLastLine) == line) {
512 beginFoldBlock = lookLine;
513 endFoldBlock = line;
514 firstChangeableLineBefore = line - 1;
517 if ((lookLine > 0) && (lookLineLevelNum == SC_FOLDLEVELBASE) && ((GetLevel(lookLine - 1) & SC_FOLDLEVELNUMBERMASK) > lookLineLevelNum))
518 break;
519 lookLineLevel = GetLevel(--lookLine);
520 lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
523 if (firstChangeableLineBefore == -1) {
524 for (lookLine = line - 1, lookLineLevel = GetLevel(lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
525 lookLine >= beginFoldBlock;
526 lookLineLevel = GetLevel(--lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK) {
527 if ((lookLineLevel & SC_FOLDLEVELWHITEFLAG) || (lookLineLevelNum > (level & SC_FOLDLEVELNUMBERMASK))) {
528 firstChangeableLineBefore = lookLine;
529 break;
533 if (firstChangeableLineBefore == -1)
534 firstChangeableLineBefore = beginFoldBlock - 1;
536 int firstChangeableLineAfter = -1;
537 for (lookLine = line + 1, lookLineLevel = GetLevel(lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
538 lookLine <= endFoldBlock;
539 lookLineLevel = GetLevel(++lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK) {
540 if ((lookLineLevel & SC_FOLDLEVELHEADERFLAG) && (lookLineLevelNum < (GetLevel(lookLine + 1) & SC_FOLDLEVELNUMBERMASK))) {
541 firstChangeableLineAfter = lookLine;
542 break;
545 if (firstChangeableLineAfter == -1)
546 firstChangeableLineAfter = endFoldBlock + 1;
548 highlightDelimiter.beginFoldBlock = beginFoldBlock;
549 highlightDelimiter.endFoldBlock = endFoldBlock;
550 highlightDelimiter.firstChangeableLineBefore = firstChangeableLineBefore;
551 highlightDelimiter.firstChangeableLineAfter = firstChangeableLineAfter;
554 int Document::ClampPositionIntoDocument(int pos) const {
555 return Platform::Clamp(pos, 0, Length());
558 bool Document::IsCrLf(int pos) const {
559 if (pos < 0)
560 return false;
561 if (pos >= (Length() - 1))
562 return false;
563 return (cb.CharAt(pos) == '\r') && (cb.CharAt(pos + 1) == '\n');
566 int Document::LenChar(int pos) {
567 if (pos < 0) {
568 return 1;
569 } else if (IsCrLf(pos)) {
570 return 2;
571 } else if (SC_CP_UTF8 == dbcsCodePage) {
572 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(pos));
573 const int widthCharBytes = UTF8BytesOfLead[leadByte];
574 int lengthDoc = Length();
575 if ((pos + widthCharBytes) > lengthDoc)
576 return lengthDoc - pos;
577 else
578 return widthCharBytes;
579 } else if (dbcsCodePage) {
580 return IsDBCSLeadByte(cb.CharAt(pos)) ? 2 : 1;
581 } else {
582 return 1;
586 bool Document::InGoodUTF8(int pos, int &start, int &end) const {
587 int trail = pos;
588 while ((trail>0) && (pos-trail < UTF8MaxBytes) && UTF8IsTrailByte(static_cast<unsigned char>(cb.CharAt(trail-1))))
589 trail--;
590 start = (trail > 0) ? trail-1 : trail;
592 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(start));
593 const int widthCharBytes = UTF8BytesOfLead[leadByte];
594 if (widthCharBytes == 1) {
595 return false;
596 } else {
597 int trailBytes = widthCharBytes - 1;
598 int len = pos - start;
599 if (len > trailBytes)
600 // pos too far from lead
601 return false;
602 char charBytes[UTF8MaxBytes] = {static_cast<char>(leadByte),0,0,0};
603 for (int b=1; b<widthCharBytes && ((start+b) < Length()); b++)
604 charBytes[b] = cb.CharAt(static_cast<int>(start+b));
605 int utf8status = UTF8Classify(reinterpret_cast<const unsigned char *>(charBytes), widthCharBytes);
606 if (utf8status & UTF8MaskInvalid)
607 return false;
608 end = start + widthCharBytes;
609 return true;
613 // Normalise a position so that it is not halfway through a two byte character.
614 // This can occur in two situations -
615 // When lines are terminated with \r\n pairs which should be treated as one character.
616 // When displaying DBCS text such as Japanese.
617 // If moving, move the position in the indicated direction.
618 int Document::MovePositionOutsideChar(int pos, int moveDir, bool checkLineEnd) const {
619 //Platform::DebugPrintf("NoCRLF %d %d\n", pos, moveDir);
620 // If out of range, just return minimum/maximum value.
621 if (pos <= 0)
622 return 0;
623 if (pos >= Length())
624 return Length();
626 // PLATFORM_ASSERT(pos > 0 && pos < Length());
627 if (checkLineEnd && IsCrLf(pos - 1)) {
628 if (moveDir > 0)
629 return pos + 1;
630 else
631 return pos - 1;
634 if (dbcsCodePage) {
635 if (SC_CP_UTF8 == dbcsCodePage) {
636 unsigned char ch = static_cast<unsigned char>(cb.CharAt(pos));
637 // If ch is not a trail byte then pos is valid intercharacter position
638 if (UTF8IsTrailByte(ch)) {
639 int startUTF = pos;
640 int endUTF = pos;
641 if (InGoodUTF8(pos, startUTF, endUTF)) {
642 // ch is a trail byte within a UTF-8 character
643 if (moveDir > 0)
644 pos = endUTF;
645 else
646 pos = startUTF;
648 // Else invalid UTF-8 so return position of isolated trail byte
650 } else {
651 // Anchor DBCS calculations at start of line because start of line can
652 // not be a DBCS trail byte.
653 int posStartLine = LineStart(LineFromPosition(pos));
654 if (pos == posStartLine)
655 return pos;
657 // Step back until a non-lead-byte is found.
658 int posCheck = pos;
659 while ((posCheck > posStartLine) && IsDBCSLeadByte(cb.CharAt(posCheck-1)))
660 posCheck--;
662 // Check from known start of character.
663 while (posCheck < pos) {
664 int mbsize = IsDBCSLeadByte(cb.CharAt(posCheck)) ? 2 : 1;
665 if (posCheck + mbsize == pos) {
666 return pos;
667 } else if (posCheck + mbsize > pos) {
668 if (moveDir > 0) {
669 return posCheck + mbsize;
670 } else {
671 return posCheck;
674 posCheck += mbsize;
679 return pos;
682 // NextPosition moves between valid positions - it can not handle a position in the middle of a
683 // multi-byte character. It is used to iterate through text more efficiently than MovePositionOutsideChar.
684 // A \r\n pair is treated as two characters.
685 int Document::NextPosition(int pos, int moveDir) const {
686 // If out of range, just return minimum/maximum value.
687 int increment = (moveDir > 0) ? 1 : -1;
688 if (pos + increment <= 0)
689 return 0;
690 if (pos + increment >= Length())
691 return Length();
693 if (dbcsCodePage) {
694 if (SC_CP_UTF8 == dbcsCodePage) {
695 if (increment == 1) {
696 // Simple forward movement case so can avoid some checks
697 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(pos));
698 if (UTF8IsAscii(leadByte)) {
699 // Single byte character or invalid
700 pos++;
701 } else {
702 const int widthCharBytes = UTF8BytesOfLead[leadByte];
703 char charBytes[UTF8MaxBytes] = {static_cast<char>(leadByte),0,0,0};
704 for (int b=1; b<widthCharBytes; b++)
705 charBytes[b] = cb.CharAt(static_cast<int>(pos+b));
706 int utf8status = UTF8Classify(reinterpret_cast<const unsigned char *>(charBytes), widthCharBytes);
707 if (utf8status & UTF8MaskInvalid)
708 pos++;
709 else
710 pos += utf8status & UTF8MaskWidth;
712 } else {
713 // Examine byte before position
714 pos--;
715 unsigned char ch = static_cast<unsigned char>(cb.CharAt(pos));
716 // If ch is not a trail byte then pos is valid intercharacter position
717 if (UTF8IsTrailByte(ch)) {
718 // If ch is a trail byte in a valid UTF-8 character then return start of character
719 int startUTF = pos;
720 int endUTF = pos;
721 if (InGoodUTF8(pos, startUTF, endUTF)) {
722 pos = startUTF;
724 // Else invalid UTF-8 so return position of isolated trail byte
727 } else {
728 if (moveDir > 0) {
729 int mbsize = IsDBCSLeadByte(cb.CharAt(pos)) ? 2 : 1;
730 pos += mbsize;
731 if (pos > Length())
732 pos = Length();
733 } else {
734 // Anchor DBCS calculations at start of line because start of line can
735 // not be a DBCS trail byte.
736 int posStartLine = LineStart(LineFromPosition(pos));
737 // See http://msdn.microsoft.com/en-us/library/cc194792%28v=MSDN.10%29.aspx
738 // http://msdn.microsoft.com/en-us/library/cc194790.aspx
739 if ((pos - 1) <= posStartLine) {
740 return pos - 1;
741 } else if (IsDBCSLeadByte(cb.CharAt(pos - 1))) {
742 // Must actually be trail byte
743 return pos - 2;
744 } else {
745 // Otherwise, step back until a non-lead-byte is found.
746 int posTemp = pos - 1;
747 while (posStartLine <= --posTemp && IsDBCSLeadByte(cb.CharAt(posTemp)))
749 // Now posTemp+1 must point to the beginning of a character,
750 // so figure out whether we went back an even or an odd
751 // number of bytes and go back 1 or 2 bytes, respectively.
752 return (pos - 1 - ((pos - posTemp) & 1));
756 } else {
757 pos += increment;
760 return pos;
763 bool Document::NextCharacter(int &pos, int moveDir) const {
764 // Returns true if pos changed
765 int posNext = NextPosition(pos, moveDir);
766 if (posNext == pos) {
767 return false;
768 } else {
769 pos = posNext;
770 return true;
774 // Return -1 on out-of-bounds
775 Sci_Position SCI_METHOD Document::GetRelativePosition(Sci_Position positionStart, Sci_Position characterOffset) const {
776 int pos = positionStart;
777 if (dbcsCodePage) {
778 const int increment = (characterOffset > 0) ? 1 : -1;
779 while (characterOffset != 0) {
780 const int posNext = NextPosition(pos, increment);
781 if (posNext == pos)
782 return INVALID_POSITION;
783 pos = posNext;
784 characterOffset -= increment;
786 } else {
787 pos = positionStart + characterOffset;
788 if ((pos < 0) || (pos > Length()))
789 return INVALID_POSITION;
791 return pos;
794 int Document::GetRelativePositionUTF16(int positionStart, int characterOffset) const {
795 int pos = positionStart;
796 if (dbcsCodePage) {
797 const int increment = (characterOffset > 0) ? 1 : -1;
798 while (characterOffset != 0) {
799 const int posNext = NextPosition(pos, increment);
800 if (posNext == pos)
801 return INVALID_POSITION;
802 if (abs(pos-posNext) > 3) // 4 byte character = 2*UTF16.
803 characterOffset -= increment;
804 pos = posNext;
805 characterOffset -= increment;
807 } else {
808 pos = positionStart + characterOffset;
809 if ((pos < 0) || (pos > Length()))
810 return INVALID_POSITION;
812 return pos;
815 int SCI_METHOD Document::GetCharacterAndWidth(Sci_Position position, Sci_Position *pWidth) const {
816 int character;
817 int bytesInCharacter = 1;
818 if (dbcsCodePage) {
819 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(position));
820 if (SC_CP_UTF8 == dbcsCodePage) {
821 if (UTF8IsAscii(leadByte)) {
822 // Single byte character or invalid
823 character = leadByte;
824 } else {
825 const int widthCharBytes = UTF8BytesOfLead[leadByte];
826 unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0};
827 for (int b=1; b<widthCharBytes; b++)
828 charBytes[b] = static_cast<unsigned char>(cb.CharAt(position+b));
829 int utf8status = UTF8Classify(charBytes, widthCharBytes);
830 if (utf8status & UTF8MaskInvalid) {
831 // Report as singleton surrogate values which are invalid Unicode
832 character = 0xDC80 + leadByte;
833 } else {
834 bytesInCharacter = utf8status & UTF8MaskWidth;
835 character = UnicodeFromUTF8(charBytes);
838 } else {
839 if (IsDBCSLeadByte(leadByte)) {
840 bytesInCharacter = 2;
841 character = (leadByte << 8) | static_cast<unsigned char>(cb.CharAt(position+1));
842 } else {
843 character = leadByte;
846 } else {
847 character = cb.CharAt(position);
849 if (pWidth) {
850 *pWidth = bytesInCharacter;
852 return character;
855 int SCI_METHOD Document::CodePage() const {
856 return dbcsCodePage;
859 bool SCI_METHOD Document::IsDBCSLeadByte(char ch) const {
860 // Byte ranges found in Wikipedia articles with relevant search strings in each case
861 unsigned char uch = static_cast<unsigned char>(ch);
862 switch (dbcsCodePage) {
863 case 932:
864 // Shift_jis
865 return ((uch >= 0x81) && (uch <= 0x9F)) ||
866 ((uch >= 0xE0) && (uch <= 0xFC));
867 // Lead bytes F0 to FC may be a Microsoft addition.
868 case 936:
869 // GBK
870 return (uch >= 0x81) && (uch <= 0xFE);
871 case 949:
872 // Korean Wansung KS C-5601-1987
873 return (uch >= 0x81) && (uch <= 0xFE);
874 case 950:
875 // Big5
876 return (uch >= 0x81) && (uch <= 0xFE);
877 case 1361:
878 // Korean Johab KS C-5601-1992
879 return
880 ((uch >= 0x84) && (uch <= 0xD3)) ||
881 ((uch >= 0xD8) && (uch <= 0xDE)) ||
882 ((uch >= 0xE0) && (uch <= 0xF9));
884 return false;
887 static inline bool IsSpaceOrTab(int ch) {
888 return ch == ' ' || ch == '\t';
891 // Need to break text into segments near lengthSegment but taking into
892 // account the encoding to not break inside a UTF-8 or DBCS character
893 // and also trying to avoid breaking inside a pair of combining characters.
894 // The segment length must always be long enough (more than 4 bytes)
895 // so that there will be at least one whole character to make a segment.
896 // For UTF-8, text must consist only of valid whole characters.
897 // In preference order from best to worst:
898 // 1) Break after space
899 // 2) Break before punctuation
900 // 3) Break after whole character
902 int Document::SafeSegment(const char *text, int length, int lengthSegment) const {
903 if (length <= lengthSegment)
904 return length;
905 int lastSpaceBreak = -1;
906 int lastPunctuationBreak = -1;
907 int lastEncodingAllowedBreak = 0;
908 for (int j=0; j < lengthSegment;) {
909 unsigned char ch = static_cast<unsigned char>(text[j]);
910 if (j > 0) {
911 if (IsSpaceOrTab(text[j - 1]) && !IsSpaceOrTab(text[j])) {
912 lastSpaceBreak = j;
914 if (ch < 'A') {
915 lastPunctuationBreak = j;
918 lastEncodingAllowedBreak = j;
920 if (dbcsCodePage == SC_CP_UTF8) {
921 j += UTF8BytesOfLead[ch];
922 } else if (dbcsCodePage) {
923 j += IsDBCSLeadByte(ch) ? 2 : 1;
924 } else {
925 j++;
928 if (lastSpaceBreak >= 0) {
929 return lastSpaceBreak;
930 } else if (lastPunctuationBreak >= 0) {
931 return lastPunctuationBreak;
933 return lastEncodingAllowedBreak;
936 EncodingFamily Document::CodePageFamily() const {
937 if (SC_CP_UTF8 == dbcsCodePage)
938 return efUnicode;
939 else if (dbcsCodePage)
940 return efDBCS;
941 else
942 return efEightBit;
945 void Document::ModifiedAt(int pos) {
946 if (endStyled > pos)
947 endStyled = pos;
950 void Document::CheckReadOnly() {
951 if (cb.IsReadOnly() && enteredReadOnlyCount == 0) {
952 enteredReadOnlyCount++;
953 NotifyModifyAttempt();
954 enteredReadOnlyCount--;
958 // Document only modified by gateways DeleteChars, InsertString, Undo, Redo, and SetStyleAt.
959 // SetStyleAt does not change the persistent state of a document
961 bool Document::DeleteChars(int pos, int len) {
962 if (pos < 0)
963 return false;
964 if (len <= 0)
965 return false;
966 if ((pos + len) > Length())
967 return false;
968 CheckReadOnly();
969 if (enteredModification != 0) {
970 return false;
971 } else {
972 enteredModification++;
973 if (!cb.IsReadOnly()) {
974 NotifyModified(
975 DocModification(
976 SC_MOD_BEFOREDELETE | SC_PERFORMED_USER,
977 pos, len,
978 0, 0));
979 int prevLinesTotal = LinesTotal();
980 bool startSavePoint = cb.IsSavePoint();
981 bool startSequence = false;
982 const char *text = cb.DeleteChars(pos, len, startSequence);
983 if (startSavePoint && cb.IsCollectingUndo())
984 NotifySavePoint(!startSavePoint);
985 if ((pos < Length()) || (pos == 0))
986 ModifiedAt(pos);
987 else
988 ModifiedAt(pos-1);
989 NotifyModified(
990 DocModification(
991 SC_MOD_DELETETEXT | SC_PERFORMED_USER | (startSequence?SC_STARTACTION:0),
992 pos, len,
993 LinesTotal() - prevLinesTotal, text));
995 enteredModification--;
997 return !cb.IsReadOnly();
1001 * Insert a string with a length.
1003 int Document::InsertString(int position, const char *s, int insertLength) {
1004 if (insertLength <= 0) {
1005 return 0;
1007 CheckReadOnly(); // Application may change read only state here
1008 if (cb.IsReadOnly()) {
1009 return 0;
1011 if (enteredModification != 0) {
1012 return 0;
1014 enteredModification++;
1015 insertionSet = false;
1016 insertion.clear();
1017 NotifyModified(
1018 DocModification(
1019 SC_MOD_INSERTCHECK,
1020 position, insertLength,
1021 0, s));
1022 if (insertionSet) {
1023 s = insertion.c_str();
1024 insertLength = static_cast<int>(insertion.length());
1026 NotifyModified(
1027 DocModification(
1028 SC_MOD_BEFOREINSERT | SC_PERFORMED_USER,
1029 position, insertLength,
1030 0, s));
1031 int prevLinesTotal = LinesTotal();
1032 bool startSavePoint = cb.IsSavePoint();
1033 bool startSequence = false;
1034 const char *text = cb.InsertString(position, s, insertLength, startSequence);
1035 if (startSavePoint && cb.IsCollectingUndo())
1036 NotifySavePoint(!startSavePoint);
1037 ModifiedAt(position);
1038 NotifyModified(
1039 DocModification(
1040 SC_MOD_INSERTTEXT | SC_PERFORMED_USER | (startSequence?SC_STARTACTION:0),
1041 position, insertLength,
1042 LinesTotal() - prevLinesTotal, text));
1043 if (insertionSet) { // Free memory as could be large
1044 std::string().swap(insertion);
1046 enteredModification--;
1047 return insertLength;
1050 void Document::ChangeInsertion(const char *s, int length) {
1051 insertionSet = true;
1052 insertion.assign(s, length);
1055 int SCI_METHOD Document::AddData(char *data, Sci_Position length) {
1056 try {
1057 int position = Length();
1058 InsertString(position, data, length);
1059 } catch (std::bad_alloc &) {
1060 return SC_STATUS_BADALLOC;
1061 } catch (...) {
1062 return SC_STATUS_FAILURE;
1064 return 0;
1067 void * SCI_METHOD Document::ConvertToDocument() {
1068 return this;
1071 int Document::Undo() {
1072 int newPos = -1;
1073 CheckReadOnly();
1074 if ((enteredModification == 0) && (cb.IsCollectingUndo())) {
1075 enteredModification++;
1076 if (!cb.IsReadOnly()) {
1077 bool startSavePoint = cb.IsSavePoint();
1078 bool multiLine = false;
1079 int steps = cb.StartUndo();
1080 //Platform::DebugPrintf("Steps=%d\n", steps);
1081 int coalescedRemovePos = -1;
1082 int coalescedRemoveLen = 0;
1083 int prevRemoveActionPos = -1;
1084 int prevRemoveActionLen = 0;
1085 for (int step = 0; step < steps; step++) {
1086 const int prevLinesTotal = LinesTotal();
1087 const Action &action = cb.GetUndoStep();
1088 if (action.at == removeAction) {
1089 NotifyModified(DocModification(
1090 SC_MOD_BEFOREINSERT | SC_PERFORMED_UNDO, action));
1091 } else if (action.at == containerAction) {
1092 DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_UNDO);
1093 dm.token = action.position;
1094 NotifyModified(dm);
1095 if (!action.mayCoalesce) {
1096 coalescedRemovePos = -1;
1097 coalescedRemoveLen = 0;
1098 prevRemoveActionPos = -1;
1099 prevRemoveActionLen = 0;
1101 } else {
1102 NotifyModified(DocModification(
1103 SC_MOD_BEFOREDELETE | SC_PERFORMED_UNDO, action));
1105 cb.PerformUndoStep();
1106 if (action.at != containerAction) {
1107 ModifiedAt(action.position);
1108 newPos = action.position;
1111 int modFlags = SC_PERFORMED_UNDO;
1112 // With undo, an insertion action becomes a deletion notification
1113 if (action.at == removeAction) {
1114 newPos += action.lenData;
1115 modFlags |= SC_MOD_INSERTTEXT;
1116 if ((coalescedRemoveLen > 0) &&
1117 (action.position == prevRemoveActionPos || action.position == (prevRemoveActionPos + prevRemoveActionLen))) {
1118 coalescedRemoveLen += action.lenData;
1119 newPos = coalescedRemovePos + coalescedRemoveLen;
1120 } else {
1121 coalescedRemovePos = action.position;
1122 coalescedRemoveLen = action.lenData;
1124 prevRemoveActionPos = action.position;
1125 prevRemoveActionLen = action.lenData;
1126 } else if (action.at == insertAction) {
1127 modFlags |= SC_MOD_DELETETEXT;
1128 coalescedRemovePos = -1;
1129 coalescedRemoveLen = 0;
1130 prevRemoveActionPos = -1;
1131 prevRemoveActionLen = 0;
1133 if (steps > 1)
1134 modFlags |= SC_MULTISTEPUNDOREDO;
1135 const int linesAdded = LinesTotal() - prevLinesTotal;
1136 if (linesAdded != 0)
1137 multiLine = true;
1138 if (step == steps - 1) {
1139 modFlags |= SC_LASTSTEPINUNDOREDO;
1140 if (multiLine)
1141 modFlags |= SC_MULTILINEUNDOREDO;
1143 NotifyModified(DocModification(modFlags, action.position, action.lenData,
1144 linesAdded, action.data));
1147 bool endSavePoint = cb.IsSavePoint();
1148 if (startSavePoint != endSavePoint)
1149 NotifySavePoint(endSavePoint);
1151 enteredModification--;
1153 return newPos;
1156 int Document::Redo() {
1157 int newPos = -1;
1158 CheckReadOnly();
1159 if ((enteredModification == 0) && (cb.IsCollectingUndo())) {
1160 enteredModification++;
1161 if (!cb.IsReadOnly()) {
1162 bool startSavePoint = cb.IsSavePoint();
1163 bool multiLine = false;
1164 int steps = cb.StartRedo();
1165 for (int step = 0; step < steps; step++) {
1166 const int prevLinesTotal = LinesTotal();
1167 const Action &action = cb.GetRedoStep();
1168 if (action.at == insertAction) {
1169 NotifyModified(DocModification(
1170 SC_MOD_BEFOREINSERT | SC_PERFORMED_REDO, action));
1171 } else if (action.at == containerAction) {
1172 DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_REDO);
1173 dm.token = action.position;
1174 NotifyModified(dm);
1175 } else {
1176 NotifyModified(DocModification(
1177 SC_MOD_BEFOREDELETE | SC_PERFORMED_REDO, action));
1179 cb.PerformRedoStep();
1180 if (action.at != containerAction) {
1181 ModifiedAt(action.position);
1182 newPos = action.position;
1185 int modFlags = SC_PERFORMED_REDO;
1186 if (action.at == insertAction) {
1187 newPos += action.lenData;
1188 modFlags |= SC_MOD_INSERTTEXT;
1189 } else if (action.at == removeAction) {
1190 modFlags |= SC_MOD_DELETETEXT;
1192 if (steps > 1)
1193 modFlags |= SC_MULTISTEPUNDOREDO;
1194 const int linesAdded = LinesTotal() - prevLinesTotal;
1195 if (linesAdded != 0)
1196 multiLine = true;
1197 if (step == steps - 1) {
1198 modFlags |= SC_LASTSTEPINUNDOREDO;
1199 if (multiLine)
1200 modFlags |= SC_MULTILINEUNDOREDO;
1202 NotifyModified(
1203 DocModification(modFlags, action.position, action.lenData,
1204 linesAdded, action.data));
1207 bool endSavePoint = cb.IsSavePoint();
1208 if (startSavePoint != endSavePoint)
1209 NotifySavePoint(endSavePoint);
1211 enteredModification--;
1213 return newPos;
1216 void Document::DelChar(int pos) {
1217 DeleteChars(pos, LenChar(pos));
1220 void Document::DelCharBack(int pos) {
1221 if (pos <= 0) {
1222 return;
1223 } else if (IsCrLf(pos - 2)) {
1224 DeleteChars(pos - 2, 2);
1225 } else if (dbcsCodePage) {
1226 int startChar = NextPosition(pos, -1);
1227 DeleteChars(startChar, pos - startChar);
1228 } else {
1229 DeleteChars(pos - 1, 1);
1233 static int NextTab(int pos, int tabSize) {
1234 return ((pos / tabSize) + 1) * tabSize;
1237 static std::string CreateIndentation(int indent, int tabSize, bool insertSpaces) {
1238 std::string indentation;
1239 if (!insertSpaces) {
1240 while (indent >= tabSize) {
1241 indentation += '\t';
1242 indent -= tabSize;
1245 while (indent > 0) {
1246 indentation += ' ';
1247 indent--;
1249 return indentation;
1252 int SCI_METHOD Document::GetLineIndentation(Sci_Position line) {
1253 int indent = 0;
1254 if ((line >= 0) && (line < LinesTotal())) {
1255 int lineStart = LineStart(line);
1256 int length = Length();
1257 for (int i = lineStart; i < length; i++) {
1258 char ch = cb.CharAt(i);
1259 if (ch == ' ')
1260 indent++;
1261 else if (ch == '\t')
1262 indent = NextTab(indent, tabInChars);
1263 else
1264 return indent;
1267 return indent;
1270 int Document::SetLineIndentation(int line, int indent) {
1271 int indentOfLine = GetLineIndentation(line);
1272 if (indent < 0)
1273 indent = 0;
1274 if (indent != indentOfLine) {
1275 std::string linebuf = CreateIndentation(indent, tabInChars, !useTabs);
1276 int thisLineStart = LineStart(line);
1277 int indentPos = GetLineIndentPosition(line);
1278 UndoGroup ug(this);
1279 DeleteChars(thisLineStart, indentPos - thisLineStart);
1280 return thisLineStart + InsertString(thisLineStart, linebuf.c_str(),
1281 static_cast<int>(linebuf.length()));
1282 } else {
1283 return GetLineIndentPosition(line);
1287 int Document::GetLineIndentPosition(int line) const {
1288 if (line < 0)
1289 return 0;
1290 int pos = LineStart(line);
1291 int length = Length();
1292 while ((pos < length) && IsSpaceOrTab(cb.CharAt(pos))) {
1293 pos++;
1295 return pos;
1298 int Document::GetColumn(int pos) {
1299 int column = 0;
1300 int line = LineFromPosition(pos);
1301 if ((line >= 0) && (line < LinesTotal())) {
1302 for (int i = LineStart(line); i < pos;) {
1303 char ch = cb.CharAt(i);
1304 if (ch == '\t') {
1305 column = NextTab(column, tabInChars);
1306 i++;
1307 } else if (ch == '\r') {
1308 return column;
1309 } else if (ch == '\n') {
1310 return column;
1311 } else if (i >= Length()) {
1312 return column;
1313 } else {
1314 column++;
1315 i = NextPosition(i, 1);
1319 return column;
1322 int Document::CountCharacters(int startPos, int endPos) const {
1323 startPos = MovePositionOutsideChar(startPos, 1, false);
1324 endPos = MovePositionOutsideChar(endPos, -1, false);
1325 int count = 0;
1326 int i = startPos;
1327 while (i < endPos) {
1328 count++;
1329 i = NextPosition(i, 1);
1331 return count;
1334 int Document::CountUTF16(int startPos, int endPos) const {
1335 startPos = MovePositionOutsideChar(startPos, 1, false);
1336 endPos = MovePositionOutsideChar(endPos, -1, false);
1337 int count = 0;
1338 int i = startPos;
1339 while (i < endPos) {
1340 count++;
1341 const int next = NextPosition(i, 1);
1342 if ((next - i) > 3)
1343 count++;
1344 i = next;
1346 return count;
1349 int Document::FindColumn(int line, int column) {
1350 int position = LineStart(line);
1351 if ((line >= 0) && (line < LinesTotal())) {
1352 int columnCurrent = 0;
1353 while ((columnCurrent < column) && (position < Length())) {
1354 char ch = cb.CharAt(position);
1355 if (ch == '\t') {
1356 columnCurrent = NextTab(columnCurrent, tabInChars);
1357 if (columnCurrent > column)
1358 return position;
1359 position++;
1360 } else if (ch == '\r') {
1361 return position;
1362 } else if (ch == '\n') {
1363 return position;
1364 } else {
1365 columnCurrent++;
1366 position = NextPosition(position, 1);
1370 return position;
1373 void Document::Indent(bool forwards, int lineBottom, int lineTop) {
1374 // Dedent - suck white space off the front of the line to dedent by equivalent of a tab
1375 for (int line = lineBottom; line >= lineTop; line--) {
1376 int indentOfLine = GetLineIndentation(line);
1377 if (forwards) {
1378 if (LineStart(line) < LineEnd(line)) {
1379 SetLineIndentation(line, indentOfLine + IndentSize());
1381 } else {
1382 SetLineIndentation(line, indentOfLine - IndentSize());
1387 // Convert line endings for a piece of text to a particular mode.
1388 // Stop at len or when a NUL is found.
1389 std::string Document::TransformLineEnds(const char *s, size_t len, int eolModeWanted) {
1390 std::string dest;
1391 for (size_t i = 0; (i < len) && (s[i]); i++) {
1392 if (s[i] == '\n' || s[i] == '\r') {
1393 if (eolModeWanted == SC_EOL_CR) {
1394 dest.push_back('\r');
1395 } else if (eolModeWanted == SC_EOL_LF) {
1396 dest.push_back('\n');
1397 } else { // eolModeWanted == SC_EOL_CRLF
1398 dest.push_back('\r');
1399 dest.push_back('\n');
1401 if ((s[i] == '\r') && (i+1 < len) && (s[i+1] == '\n')) {
1402 i++;
1404 } else {
1405 dest.push_back(s[i]);
1408 return dest;
1411 void Document::ConvertLineEnds(int eolModeSet) {
1412 UndoGroup ug(this);
1414 for (int pos = 0; pos < Length(); pos++) {
1415 if (cb.CharAt(pos) == '\r') {
1416 if (cb.CharAt(pos + 1) == '\n') {
1417 // CRLF
1418 if (eolModeSet == SC_EOL_CR) {
1419 DeleteChars(pos + 1, 1); // Delete the LF
1420 } else if (eolModeSet == SC_EOL_LF) {
1421 DeleteChars(pos, 1); // Delete the CR
1422 } else {
1423 pos++;
1425 } else {
1426 // CR
1427 if (eolModeSet == SC_EOL_CRLF) {
1428 pos += InsertString(pos + 1, "\n", 1); // Insert LF
1429 } else if (eolModeSet == SC_EOL_LF) {
1430 pos += InsertString(pos, "\n", 1); // Insert LF
1431 DeleteChars(pos, 1); // Delete CR
1432 pos--;
1435 } else if (cb.CharAt(pos) == '\n') {
1436 // LF
1437 if (eolModeSet == SC_EOL_CRLF) {
1438 pos += InsertString(pos, "\r", 1); // Insert CR
1439 } else if (eolModeSet == SC_EOL_CR) {
1440 pos += InsertString(pos, "\r", 1); // Insert CR
1441 DeleteChars(pos, 1); // Delete LF
1442 pos--;
1449 bool Document::IsWhiteLine(int line) const {
1450 int currentChar = LineStart(line);
1451 int endLine = LineEnd(line);
1452 while (currentChar < endLine) {
1453 if (cb.CharAt(currentChar) != ' ' && cb.CharAt(currentChar) != '\t') {
1454 return false;
1456 ++currentChar;
1458 return true;
1461 int Document::ParaUp(int pos) const {
1462 int line = LineFromPosition(pos);
1463 line--;
1464 while (line >= 0 && IsWhiteLine(line)) { // skip empty lines
1465 line--;
1467 while (line >= 0 && !IsWhiteLine(line)) { // skip non-empty lines
1468 line--;
1470 line++;
1471 return LineStart(line);
1474 int Document::ParaDown(int pos) const {
1475 int line = LineFromPosition(pos);
1476 while (line < LinesTotal() && !IsWhiteLine(line)) { // skip non-empty lines
1477 line++;
1479 while (line < LinesTotal() && IsWhiteLine(line)) { // skip empty lines
1480 line++;
1482 if (line < LinesTotal())
1483 return LineStart(line);
1484 else // end of a document
1485 return LineEnd(line-1);
1488 CharClassify::cc Document::WordCharClass(unsigned char ch) const {
1489 if ((SC_CP_UTF8 == dbcsCodePage) && (!UTF8IsAscii(ch)))
1490 return CharClassify::ccWord;
1491 return charClass.GetClass(ch);
1495 * Used by commmands that want to select whole words.
1496 * Finds the start of word at pos when delta < 0 or the end of the word when delta >= 0.
1498 int Document::ExtendWordSelect(int pos, int delta, bool onlyWordCharacters) {
1499 CharClassify::cc ccStart = CharClassify::ccWord;
1500 if (delta < 0) {
1501 if (!onlyWordCharacters)
1502 ccStart = WordCharClass(cb.CharAt(pos-1));
1503 while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == ccStart))
1504 pos--;
1505 } else {
1506 if (!onlyWordCharacters && pos < Length())
1507 ccStart = WordCharClass(cb.CharAt(pos));
1508 while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == ccStart))
1509 pos++;
1511 return MovePositionOutsideChar(pos, delta, true);
1515 * Find the start of the next word in either a forward (delta >= 0) or backwards direction
1516 * (delta < 0).
1517 * This is looking for a transition between character classes although there is also some
1518 * additional movement to transit white space.
1519 * Used by cursor movement by word commands.
1521 int Document::NextWordStart(int pos, int delta) {
1522 if (delta < 0) {
1523 while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == CharClassify::ccSpace))
1524 pos--;
1525 if (pos > 0) {
1526 CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos-1));
1527 while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == ccStart)) {
1528 pos--;
1531 } else {
1532 CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos));
1533 while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == ccStart))
1534 pos++;
1535 while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == CharClassify::ccSpace))
1536 pos++;
1538 return pos;
1542 * Find the end of the next word in either a forward (delta >= 0) or backwards direction
1543 * (delta < 0).
1544 * This is looking for a transition between character classes although there is also some
1545 * additional movement to transit white space.
1546 * Used by cursor movement by word commands.
1548 int Document::NextWordEnd(int pos, int delta) {
1549 if (delta < 0) {
1550 if (pos > 0) {
1551 CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos-1));
1552 if (ccStart != CharClassify::ccSpace) {
1553 while (pos > 0 && WordCharClass(cb.CharAt(pos - 1)) == ccStart) {
1554 pos--;
1557 while (pos > 0 && WordCharClass(cb.CharAt(pos - 1)) == CharClassify::ccSpace) {
1558 pos--;
1561 } else {
1562 while (pos < Length() && WordCharClass(cb.CharAt(pos)) == CharClassify::ccSpace) {
1563 pos++;
1565 if (pos < Length()) {
1566 CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos));
1567 while (pos < Length() && WordCharClass(cb.CharAt(pos)) == ccStart) {
1568 pos++;
1572 return pos;
1576 * Check that the character at the given position is a word or punctuation character and that
1577 * the previous character is of a different character class.
1579 bool Document::IsWordStartAt(int pos) const {
1580 if (pos > 0) {
1581 CharClassify::cc ccPos = WordCharClass(CharAt(pos));
1582 return (ccPos == CharClassify::ccWord || ccPos == CharClassify::ccPunctuation) &&
1583 (ccPos != WordCharClass(CharAt(pos - 1)));
1585 return true;
1589 * Check that the character at the given position is a word or punctuation character and that
1590 * the next character is of a different character class.
1592 bool Document::IsWordEndAt(int pos) const {
1593 if (pos < Length()) {
1594 CharClassify::cc ccPrev = WordCharClass(CharAt(pos-1));
1595 return (ccPrev == CharClassify::ccWord || ccPrev == CharClassify::ccPunctuation) &&
1596 (ccPrev != WordCharClass(CharAt(pos)));
1598 return true;
1602 * Check that the given range is has transitions between character classes at both
1603 * ends and where the characters on the inside are word or punctuation characters.
1605 bool Document::IsWordAt(int start, int end) const {
1606 return (start < end) && IsWordStartAt(start) && IsWordEndAt(end);
1609 bool Document::MatchesWordOptions(bool word, bool wordStart, int pos, int length) const {
1610 return (!word && !wordStart) ||
1611 (word && IsWordAt(pos, pos + length)) ||
1612 (wordStart && IsWordStartAt(pos));
1615 bool Document::HasCaseFolder(void) const {
1616 return pcf != 0;
1619 void Document::SetCaseFolder(CaseFolder *pcf_) {
1620 delete pcf;
1621 pcf = pcf_;
1624 Document::CharacterExtracted Document::ExtractCharacter(int position) const {
1625 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(position));
1626 if (UTF8IsAscii(leadByte)) {
1627 // Common case: ASCII character
1628 return CharacterExtracted(leadByte, 1);
1630 const int widthCharBytes = UTF8BytesOfLead[leadByte];
1631 unsigned char charBytes[UTF8MaxBytes] = { leadByte, 0, 0, 0 };
1632 for (int b=1; b<widthCharBytes; b++)
1633 charBytes[b] = static_cast<unsigned char>(cb.CharAt(position + b));
1634 int utf8status = UTF8Classify(charBytes, widthCharBytes);
1635 if (utf8status & UTF8MaskInvalid) {
1636 // Treat as invalid and use up just one byte
1637 return CharacterExtracted(unicodeReplacementChar, 1);
1638 } else {
1639 return CharacterExtracted(UnicodeFromUTF8(charBytes), utf8status & UTF8MaskWidth);
1644 * Find text in document, supporting both forward and backward
1645 * searches (just pass minPos > maxPos to do a backward search)
1646 * Has not been tested with backwards DBCS searches yet.
1648 long Document::FindText(int minPos, int maxPos, const char *search,
1649 int flags, int *length) {
1650 if (*length <= 0)
1651 return minPos;
1652 const bool caseSensitive = (flags & SCFIND_MATCHCASE) != 0;
1653 const bool word = (flags & SCFIND_WHOLEWORD) != 0;
1654 const bool wordStart = (flags & SCFIND_WORDSTART) != 0;
1655 const bool regExp = (flags & SCFIND_REGEXP) != 0;
1656 if (regExp) {
1657 if (!regex)
1658 regex = CreateRegexSearch(&charClass);
1659 return regex->FindText(this, minPos, maxPos, search, caseSensitive, word, wordStart, flags, length);
1660 } else {
1662 const bool forward = minPos <= maxPos;
1663 const int increment = forward ? 1 : -1;
1665 // Range endpoints should not be inside DBCS characters, but just in case, move them.
1666 const int startPos = MovePositionOutsideChar(minPos, increment, false);
1667 const int endPos = MovePositionOutsideChar(maxPos, increment, false);
1669 // Compute actual search ranges needed
1670 const int lengthFind = *length;
1672 //Platform::DebugPrintf("Find %d %d %s %d\n", startPos, endPos, ft->lpstrText, lengthFind);
1673 const int limitPos = Platform::Maximum(startPos, endPos);
1674 int pos = startPos;
1675 if (!forward) {
1676 // Back all of a character
1677 pos = NextPosition(pos, increment);
1679 if (caseSensitive) {
1680 const int endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos;
1681 const char charStartSearch = search[0];
1682 while (forward ? (pos < endSearch) : (pos >= endSearch)) {
1683 if (CharAt(pos) == charStartSearch) {
1684 bool found = (pos + lengthFind) <= limitPos;
1685 for (int indexSearch = 1; (indexSearch < lengthFind) && found; indexSearch++) {
1686 found = CharAt(pos + indexSearch) == search[indexSearch];
1688 if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
1689 return pos;
1692 if (!NextCharacter(pos, increment))
1693 break;
1695 } else if (SC_CP_UTF8 == dbcsCodePage) {
1696 const size_t maxFoldingExpansion = 4;
1697 std::vector<char> searchThing(lengthFind * UTF8MaxBytes * maxFoldingExpansion + 1);
1698 const int lenSearch = static_cast<int>(
1699 pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind));
1700 char bytes[UTF8MaxBytes + 1];
1701 char folded[UTF8MaxBytes * maxFoldingExpansion + 1];
1702 while (forward ? (pos < endPos) : (pos >= endPos)) {
1703 int widthFirstCharacter = 0;
1704 int posIndexDocument = pos;
1705 int indexSearch = 0;
1706 bool characterMatches = true;
1707 for (;;) {
1708 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(posIndexDocument));
1709 bytes[0] = leadByte;
1710 int widthChar = 1;
1711 if (!UTF8IsAscii(leadByte)) {
1712 const int widthCharBytes = UTF8BytesOfLead[leadByte];
1713 for (int b=1; b<widthCharBytes; b++) {
1714 bytes[b] = cb.CharAt(posIndexDocument+b);
1716 widthChar = UTF8Classify(reinterpret_cast<const unsigned char *>(bytes), widthCharBytes) & UTF8MaskWidth;
1718 if (!widthFirstCharacter)
1719 widthFirstCharacter = widthChar;
1720 if ((posIndexDocument + widthChar) > limitPos)
1721 break;
1722 const int lenFlat = static_cast<int>(pcf->Fold(folded, sizeof(folded), bytes, widthChar));
1723 folded[lenFlat] = 0;
1724 // Does folded match the buffer
1725 characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat);
1726 if (!characterMatches)
1727 break;
1728 posIndexDocument += widthChar;
1729 indexSearch += lenFlat;
1730 if (indexSearch >= lenSearch)
1731 break;
1733 if (characterMatches && (indexSearch == static_cast<int>(lenSearch))) {
1734 if (MatchesWordOptions(word, wordStart, pos, posIndexDocument - pos)) {
1735 *length = posIndexDocument - pos;
1736 return pos;
1739 if (forward) {
1740 pos += widthFirstCharacter;
1741 } else {
1742 if (!NextCharacter(pos, increment))
1743 break;
1746 } else if (dbcsCodePage) {
1747 const size_t maxBytesCharacter = 2;
1748 const size_t maxFoldingExpansion = 4;
1749 std::vector<char> searchThing(lengthFind * maxBytesCharacter * maxFoldingExpansion + 1);
1750 const int lenSearch = static_cast<int>(
1751 pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind));
1752 while (forward ? (pos < endPos) : (pos >= endPos)) {
1753 int indexDocument = 0;
1754 int indexSearch = 0;
1755 bool characterMatches = true;
1756 while (characterMatches &&
1757 ((pos + indexDocument) < limitPos) &&
1758 (indexSearch < lenSearch)) {
1759 char bytes[maxBytesCharacter + 1];
1760 bytes[0] = cb.CharAt(pos + indexDocument);
1761 const int widthChar = IsDBCSLeadByte(bytes[0]) ? 2 : 1;
1762 if (widthChar == 2)
1763 bytes[1] = cb.CharAt(pos + indexDocument + 1);
1764 if ((pos + indexDocument + widthChar) > limitPos)
1765 break;
1766 char folded[maxBytesCharacter * maxFoldingExpansion + 1];
1767 const int lenFlat = static_cast<int>(pcf->Fold(folded, sizeof(folded), bytes, widthChar));
1768 folded[lenFlat] = 0;
1769 // Does folded match the buffer
1770 characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat);
1771 indexDocument += widthChar;
1772 indexSearch += lenFlat;
1774 if (characterMatches && (indexSearch == static_cast<int>(lenSearch))) {
1775 if (MatchesWordOptions(word, wordStart, pos, indexDocument)) {
1776 *length = indexDocument;
1777 return pos;
1780 if (!NextCharacter(pos, increment))
1781 break;
1783 } else {
1784 const int endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos;
1785 std::vector<char> searchThing(lengthFind + 1);
1786 pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind);
1787 while (forward ? (pos < endSearch) : (pos >= endSearch)) {
1788 bool found = (pos + lengthFind) <= limitPos;
1789 for (int indexSearch = 0; (indexSearch < lengthFind) && found; indexSearch++) {
1790 char ch = CharAt(pos + indexSearch);
1791 char folded[2];
1792 pcf->Fold(folded, sizeof(folded), &ch, 1);
1793 found = folded[0] == searchThing[indexSearch];
1795 if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
1796 return pos;
1798 if (!NextCharacter(pos, increment))
1799 break;
1803 //Platform::DebugPrintf("Not found\n");
1804 return -1;
1807 const char *Document::SubstituteByPosition(const char *text, int *length) {
1808 if (regex)
1809 return regex->SubstituteByPosition(this, text, length);
1810 else
1811 return 0;
1814 int Document::LinesTotal() const {
1815 return cb.Lines();
1818 void Document::SetDefaultCharClasses(bool includeWordClass) {
1819 charClass.SetDefaultCharClasses(includeWordClass);
1822 void Document::SetCharClasses(const unsigned char *chars, CharClassify::cc newCharClass) {
1823 charClass.SetCharClasses(chars, newCharClass);
1826 int Document::GetCharsOfClass(CharClassify::cc characterClass, unsigned char *buffer) {
1827 return charClass.GetCharsOfClass(characterClass, buffer);
1830 void SCI_METHOD Document::StartStyling(Sci_Position position, char) {
1831 endStyled = position;
1834 bool SCI_METHOD Document::SetStyleFor(Sci_Position length, char style) {
1835 if (enteredStyling != 0) {
1836 return false;
1837 } else {
1838 enteredStyling++;
1839 int prevEndStyled = endStyled;
1840 if (cb.SetStyleFor(endStyled, length, style)) {
1841 DocModification mh(SC_MOD_CHANGESTYLE | SC_PERFORMED_USER,
1842 prevEndStyled, length);
1843 NotifyModified(mh);
1845 endStyled += length;
1846 enteredStyling--;
1847 return true;
1851 bool SCI_METHOD Document::SetStyles(Sci_Position length, const char *styles) {
1852 if (enteredStyling != 0) {
1853 return false;
1854 } else {
1855 enteredStyling++;
1856 bool didChange = false;
1857 int startMod = 0;
1858 int endMod = 0;
1859 for (int iPos = 0; iPos < length; iPos++, endStyled++) {
1860 PLATFORM_ASSERT(endStyled < Length());
1861 if (cb.SetStyleAt(endStyled, styles[iPos])) {
1862 if (!didChange) {
1863 startMod = endStyled;
1865 didChange = true;
1866 endMod = endStyled;
1869 if (didChange) {
1870 DocModification mh(SC_MOD_CHANGESTYLE | SC_PERFORMED_USER,
1871 startMod, endMod - startMod + 1);
1872 NotifyModified(mh);
1874 enteredStyling--;
1875 return true;
1879 void Document::EnsureStyledTo(int pos) {
1880 if ((enteredStyling == 0) && (pos > GetEndStyled())) {
1881 IncrementStyleClock();
1882 if (pli && !pli->UseContainerLexing()) {
1883 int lineEndStyled = LineFromPosition(GetEndStyled());
1884 int endStyledTo = LineStart(lineEndStyled);
1885 pli->Colourise(endStyledTo, pos);
1886 } else {
1887 // Ask the watchers to style, and stop as soon as one responds.
1888 for (std::vector<WatcherWithUserData>::iterator it = watchers.begin();
1889 (pos > GetEndStyled()) && (it != watchers.end()); ++it) {
1890 it->watcher->NotifyStyleNeeded(this, it->userData, pos);
1896 void Document::StyleToAdjustingLineDuration(int pos) {
1897 // Place bounds on the duration used to avoid glitches spiking it
1898 // and so causing slow styling or non-responsive scrolling
1899 const double minDurationOneLine = 0.000001;
1900 const double maxDurationOneLine = 0.0001;
1902 // Alpha value for exponential smoothing.
1903 // Most recent value contributes 25% to smoothed value.
1904 const double alpha = 0.25;
1906 const Sci_Position lineFirst = LineFromPosition(GetEndStyled());
1907 ElapsedTime etStyling;
1908 EnsureStyledTo(pos);
1909 const double durationStyling = etStyling.Duration();
1910 const Sci_Position lineLast = LineFromPosition(GetEndStyled());
1911 if (lineLast >= lineFirst + 8) {
1912 // Only adjust for styling multiple lines to avoid instability
1913 const double durationOneLine = durationStyling / (lineLast - lineFirst);
1914 durationStyleOneLine = alpha * durationOneLine + (1.0 - alpha) * durationStyleOneLine;
1915 if (durationStyleOneLine < minDurationOneLine) {
1916 durationStyleOneLine = minDurationOneLine;
1917 } else if (durationStyleOneLine > maxDurationOneLine) {
1918 durationStyleOneLine = maxDurationOneLine;
1923 void Document::LexerChanged() {
1924 // Tell the watchers the lexer has changed.
1925 for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
1926 it->watcher->NotifyLexerChanged(this, it->userData);
1930 int SCI_METHOD Document::SetLineState(Sci_Position line, int state) {
1931 int statePrevious = static_cast<LineState *>(perLineData[ldState])->SetLineState(line, state);
1932 if (state != statePrevious) {
1933 DocModification mh(SC_MOD_CHANGELINESTATE, LineStart(line), 0, 0, 0, line);
1934 NotifyModified(mh);
1936 return statePrevious;
1939 int SCI_METHOD Document::GetLineState(Sci_Position line) const {
1940 return static_cast<LineState *>(perLineData[ldState])->GetLineState(line);
1943 int Document::GetMaxLineState() {
1944 return static_cast<LineState *>(perLineData[ldState])->GetMaxLineState();
1947 void SCI_METHOD Document::ChangeLexerState(Sci_Position start, Sci_Position end) {
1948 DocModification mh(SC_MOD_LEXERSTATE, start, end-start, 0, 0, 0);
1949 NotifyModified(mh);
1952 StyledText Document::MarginStyledText(int line) const {
1953 LineAnnotation *pla = static_cast<LineAnnotation *>(perLineData[ldMargin]);
1954 return StyledText(pla->Length(line), pla->Text(line),
1955 pla->MultipleStyles(line), pla->Style(line), pla->Styles(line));
1958 void Document::MarginSetText(int line, const char *text) {
1959 static_cast<LineAnnotation *>(perLineData[ldMargin])->SetText(line, text);
1960 DocModification mh(SC_MOD_CHANGEMARGIN, LineStart(line), 0, 0, 0, line);
1961 NotifyModified(mh);
1964 void Document::MarginSetStyle(int line, int style) {
1965 static_cast<LineAnnotation *>(perLineData[ldMargin])->SetStyle(line, style);
1966 NotifyModified(DocModification(SC_MOD_CHANGEMARGIN, LineStart(line), 0, 0, 0, line));
1969 void Document::MarginSetStyles(int line, const unsigned char *styles) {
1970 static_cast<LineAnnotation *>(perLineData[ldMargin])->SetStyles(line, styles);
1971 NotifyModified(DocModification(SC_MOD_CHANGEMARGIN, LineStart(line), 0, 0, 0, line));
1974 void Document::MarginClearAll() {
1975 int maxEditorLine = LinesTotal();
1976 for (int l=0; l<maxEditorLine; l++)
1977 MarginSetText(l, 0);
1978 // Free remaining data
1979 static_cast<LineAnnotation *>(perLineData[ldMargin])->ClearAll();
1982 StyledText Document::AnnotationStyledText(int line) const {
1983 LineAnnotation *pla = static_cast<LineAnnotation *>(perLineData[ldAnnotation]);
1984 return StyledText(pla->Length(line), pla->Text(line),
1985 pla->MultipleStyles(line), pla->Style(line), pla->Styles(line));
1988 void Document::AnnotationSetText(int line, const char *text) {
1989 if (line >= 0 && line < LinesTotal()) {
1990 const int linesBefore = AnnotationLines(line);
1991 static_cast<LineAnnotation *>(perLineData[ldAnnotation])->SetText(line, text);
1992 const int linesAfter = AnnotationLines(line);
1993 DocModification mh(SC_MOD_CHANGEANNOTATION, LineStart(line), 0, 0, 0, line);
1994 mh.annotationLinesAdded = linesAfter - linesBefore;
1995 NotifyModified(mh);
1999 void Document::AnnotationSetStyle(int line, int style) {
2000 static_cast<LineAnnotation *>(perLineData[ldAnnotation])->SetStyle(line, style);
2001 DocModification mh(SC_MOD_CHANGEANNOTATION, LineStart(line), 0, 0, 0, line);
2002 NotifyModified(mh);
2005 void Document::AnnotationSetStyles(int line, const unsigned char *styles) {
2006 if (line >= 0 && line < LinesTotal()) {
2007 static_cast<LineAnnotation *>(perLineData[ldAnnotation])->SetStyles(line, styles);
2011 int Document::AnnotationLines(int line) const {
2012 return static_cast<LineAnnotation *>(perLineData[ldAnnotation])->Lines(line);
2015 void Document::AnnotationClearAll() {
2016 int maxEditorLine = LinesTotal();
2017 for (int l=0; l<maxEditorLine; l++)
2018 AnnotationSetText(l, 0);
2019 // Free remaining data
2020 static_cast<LineAnnotation *>(perLineData[ldAnnotation])->ClearAll();
2023 void Document::IncrementStyleClock() {
2024 styleClock = (styleClock + 1) % 0x100000;
2027 void SCI_METHOD Document::DecorationFillRange(Sci_Position position, int value, Sci_Position fillLength) {
2028 if (decorations.FillRange(position, value, fillLength)) {
2029 DocModification mh(SC_MOD_CHANGEINDICATOR | SC_PERFORMED_USER,
2030 position, fillLength);
2031 NotifyModified(mh);
2035 bool Document::AddWatcher(DocWatcher *watcher, void *userData) {
2036 WatcherWithUserData wwud(watcher, userData);
2037 std::vector<WatcherWithUserData>::iterator it =
2038 std::find(watchers.begin(), watchers.end(), wwud);
2039 if (it != watchers.end())
2040 return false;
2041 watchers.push_back(wwud);
2042 return true;
2045 bool Document::RemoveWatcher(DocWatcher *watcher, void *userData) {
2046 std::vector<WatcherWithUserData>::iterator it =
2047 std::find(watchers.begin(), watchers.end(), WatcherWithUserData(watcher, userData));
2048 if (it != watchers.end()) {
2049 watchers.erase(it);
2050 return true;
2052 return false;
2055 void Document::NotifyModifyAttempt() {
2056 for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
2057 it->watcher->NotifyModifyAttempt(this, it->userData);
2061 void Document::NotifySavePoint(bool atSavePoint) {
2062 for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
2063 it->watcher->NotifySavePoint(this, it->userData, atSavePoint);
2067 void Document::NotifyModified(DocModification mh) {
2068 if (mh.modificationType & SC_MOD_INSERTTEXT) {
2069 decorations.InsertSpace(mh.position, mh.length);
2070 } else if (mh.modificationType & SC_MOD_DELETETEXT) {
2071 decorations.DeleteRange(mh.position, mh.length);
2073 for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
2074 it->watcher->NotifyModified(this, mh, it->userData);
2078 bool Document::IsWordPartSeparator(char ch) const {
2079 return (WordCharClass(ch) == CharClassify::ccWord) && IsPunctuation(ch);
2082 int Document::WordPartLeft(int pos) {
2083 if (pos > 0) {
2084 --pos;
2085 char startChar = cb.CharAt(pos);
2086 if (IsWordPartSeparator(startChar)) {
2087 while (pos > 0 && IsWordPartSeparator(cb.CharAt(pos))) {
2088 --pos;
2091 if (pos > 0) {
2092 startChar = cb.CharAt(pos);
2093 --pos;
2094 if (IsLowerCase(startChar)) {
2095 while (pos > 0 && IsLowerCase(cb.CharAt(pos)))
2096 --pos;
2097 if (!IsUpperCase(cb.CharAt(pos)) && !IsLowerCase(cb.CharAt(pos)))
2098 ++pos;
2099 } else if (IsUpperCase(startChar)) {
2100 while (pos > 0 && IsUpperCase(cb.CharAt(pos)))
2101 --pos;
2102 if (!IsUpperCase(cb.CharAt(pos)))
2103 ++pos;
2104 } else if (IsADigit(startChar)) {
2105 while (pos > 0 && IsADigit(cb.CharAt(pos)))
2106 --pos;
2107 if (!IsADigit(cb.CharAt(pos)))
2108 ++pos;
2109 } else if (IsPunctuation(startChar)) {
2110 while (pos > 0 && IsPunctuation(cb.CharAt(pos)))
2111 --pos;
2112 if (!IsPunctuation(cb.CharAt(pos)))
2113 ++pos;
2114 } else if (isspacechar(startChar)) {
2115 while (pos > 0 && isspacechar(cb.CharAt(pos)))
2116 --pos;
2117 if (!isspacechar(cb.CharAt(pos)))
2118 ++pos;
2119 } else if (!IsASCII(startChar)) {
2120 while (pos > 0 && !IsASCII(cb.CharAt(pos)))
2121 --pos;
2122 if (IsASCII(cb.CharAt(pos)))
2123 ++pos;
2124 } else {
2125 ++pos;
2129 return pos;
2132 int Document::WordPartRight(int pos) {
2133 char startChar = cb.CharAt(pos);
2134 int length = Length();
2135 if (IsWordPartSeparator(startChar)) {
2136 while (pos < length && IsWordPartSeparator(cb.CharAt(pos)))
2137 ++pos;
2138 startChar = cb.CharAt(pos);
2140 if (!IsASCII(startChar)) {
2141 while (pos < length && !IsASCII(cb.CharAt(pos)))
2142 ++pos;
2143 } else if (IsLowerCase(startChar)) {
2144 while (pos < length && IsLowerCase(cb.CharAt(pos)))
2145 ++pos;
2146 } else if (IsUpperCase(startChar)) {
2147 if (IsLowerCase(cb.CharAt(pos + 1))) {
2148 ++pos;
2149 while (pos < length && IsLowerCase(cb.CharAt(pos)))
2150 ++pos;
2151 } else {
2152 while (pos < length && IsUpperCase(cb.CharAt(pos)))
2153 ++pos;
2155 if (IsLowerCase(cb.CharAt(pos)) && IsUpperCase(cb.CharAt(pos - 1)))
2156 --pos;
2157 } else if (IsADigit(startChar)) {
2158 while (pos < length && IsADigit(cb.CharAt(pos)))
2159 ++pos;
2160 } else if (IsPunctuation(startChar)) {
2161 while (pos < length && IsPunctuation(cb.CharAt(pos)))
2162 ++pos;
2163 } else if (isspacechar(startChar)) {
2164 while (pos < length && isspacechar(cb.CharAt(pos)))
2165 ++pos;
2166 } else {
2167 ++pos;
2169 return pos;
2172 static bool IsLineEndChar(char c) {
2173 return (c == '\n' || c == '\r');
2176 int Document::ExtendStyleRange(int pos, int delta, bool singleLine) {
2177 int sStart = cb.StyleAt(pos);
2178 if (delta < 0) {
2179 while (pos > 0 && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsLineEndChar(cb.CharAt(pos))))
2180 pos--;
2181 pos++;
2182 } else {
2183 while (pos < (Length()) && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsLineEndChar(cb.CharAt(pos))))
2184 pos++;
2186 return pos;
2189 static char BraceOpposite(char ch) {
2190 switch (ch) {
2191 case '(':
2192 return ')';
2193 case ')':
2194 return '(';
2195 case '[':
2196 return ']';
2197 case ']':
2198 return '[';
2199 case '{':
2200 return '}';
2201 case '}':
2202 return '{';
2203 case '<':
2204 return '>';
2205 case '>':
2206 return '<';
2207 default:
2208 return '\0';
2212 // TODO: should be able to extend styled region to find matching brace
2213 int Document::BraceMatch(int position, int /*maxReStyle*/) {
2214 char chBrace = CharAt(position);
2215 char chSeek = BraceOpposite(chBrace);
2216 if (chSeek == '\0')
2217 return - 1;
2218 const int styBrace = StyleIndexAt(position);
2219 int direction = -1;
2220 if (chBrace == '(' || chBrace == '[' || chBrace == '{' || chBrace == '<')
2221 direction = 1;
2222 int depth = 1;
2223 position = NextPosition(position, direction);
2224 while ((position >= 0) && (position < Length())) {
2225 char chAtPos = CharAt(position);
2226 const int styAtPos = StyleIndexAt(position);
2227 if ((position > GetEndStyled()) || (styAtPos == styBrace)) {
2228 if (chAtPos == chBrace)
2229 depth++;
2230 if (chAtPos == chSeek)
2231 depth--;
2232 if (depth == 0)
2233 return position;
2235 int positionBeforeMove = position;
2236 position = NextPosition(position, direction);
2237 if (position == positionBeforeMove)
2238 break;
2240 return - 1;
2244 * Implementation of RegexSearchBase for the default built-in regular expression engine
2246 class BuiltinRegex : public RegexSearchBase {
2247 public:
2248 explicit BuiltinRegex(CharClassify *charClassTable) : search(charClassTable) {}
2250 virtual ~BuiltinRegex() {
2253 virtual long FindText(Document *doc, int minPos, int maxPos, const char *s,
2254 bool caseSensitive, bool word, bool wordStart, int flags,
2255 int *length);
2257 virtual const char *SubstituteByPosition(Document *doc, const char *text, int *length);
2259 private:
2260 RESearch search;
2261 std::string substituted;
2264 namespace {
2267 * RESearchRange keeps track of search range.
2269 class RESearchRange {
2270 public:
2271 const Document *doc;
2272 int increment;
2273 int startPos;
2274 int endPos;
2275 int lineRangeStart;
2276 int lineRangeEnd;
2277 int lineRangeBreak;
2278 RESearchRange(const Document *doc_, int minPos, int maxPos) : doc(doc_) {
2279 increment = (minPos <= maxPos) ? 1 : -1;
2281 // Range endpoints should not be inside DBCS characters, but just in case, move them.
2282 startPos = doc->MovePositionOutsideChar(minPos, 1, false);
2283 endPos = doc->MovePositionOutsideChar(maxPos, 1, false);
2285 lineRangeStart = doc->LineFromPosition(startPos);
2286 lineRangeEnd = doc->LineFromPosition(endPos);
2287 if ((increment == 1) &&
2288 (startPos >= doc->LineEnd(lineRangeStart)) &&
2289 (lineRangeStart < lineRangeEnd)) {
2290 // the start position is at end of line or between line end characters.
2291 lineRangeStart++;
2292 startPos = doc->LineStart(lineRangeStart);
2293 } else if ((increment == -1) &&
2294 (startPos <= doc->LineStart(lineRangeStart)) &&
2295 (lineRangeStart > lineRangeEnd)) {
2296 // the start position is at beginning of line.
2297 lineRangeStart--;
2298 startPos = doc->LineEnd(lineRangeStart);
2300 lineRangeBreak = lineRangeEnd + increment;
2302 Range LineRange(int line) const {
2303 Range range(doc->LineStart(line), doc->LineEnd(line));
2304 if (increment == 1) {
2305 if (line == lineRangeStart)
2306 range.start = startPos;
2307 if (line == lineRangeEnd)
2308 range.end = endPos;
2309 } else {
2310 if (line == lineRangeEnd)
2311 range.start = endPos;
2312 if (line == lineRangeStart)
2313 range.end = startPos;
2315 return range;
2319 // Define a way for the Regular Expression code to access the document
2320 class DocumentIndexer : public CharacterIndexer {
2321 Document *pdoc;
2322 int end;
2323 public:
2324 DocumentIndexer(Document *pdoc_, int end_) :
2325 pdoc(pdoc_), end(end_) {
2328 virtual ~DocumentIndexer() {
2331 virtual char CharAt(int index) {
2332 if (index < 0 || index >= end)
2333 return 0;
2334 else
2335 return pdoc->CharAt(index);
2339 #ifdef CXX11_REGEX
2341 class ByteIterator : public std::iterator<std::bidirectional_iterator_tag, char> {
2342 public:
2343 const Document *doc;
2344 Position position;
2345 ByteIterator(const Document *doc_ = 0, Position position_ = 0) : doc(doc_), position(position_) {
2347 ByteIterator(const ByteIterator &other) {
2348 doc = other.doc;
2349 position = other.position;
2351 ByteIterator &operator=(const ByteIterator &other) {
2352 if (this != &other) {
2353 doc = other.doc;
2354 position = other.position;
2356 return *this;
2358 char operator*() const {
2359 return doc->CharAt(position);
2361 ByteIterator &operator++() {
2362 position++;
2363 return *this;
2365 ByteIterator operator++(int) {
2366 ByteIterator retVal(*this);
2367 position++;
2368 return retVal;
2370 ByteIterator &operator--() {
2371 position--;
2372 return *this;
2374 bool operator==(const ByteIterator &other) const {
2375 return doc == other.doc && position == other.position;
2377 bool operator!=(const ByteIterator &other) const {
2378 return doc != other.doc || position != other.position;
2380 int Pos() const {
2381 return position;
2383 int PosRoundUp() const {
2384 return position;
2388 // On Windows, wchar_t is 16 bits wide and on Unix it is 32 bits wide.
2389 // Would be better to use sizeof(wchar_t) or similar to differentiate
2390 // but easier for now to hard-code platforms.
2391 // C++11 has char16_t and char32_t but neither Clang nor Visual C++
2392 // appear to allow specializing basic_regex over these.
2394 #ifdef _WIN32
2395 #define WCHAR_T_IS_16 1
2396 #else
2397 #define WCHAR_T_IS_16 0
2398 #endif
2400 #if WCHAR_T_IS_16
2402 // On Windows, report non-BMP characters as 2 separate surrogates as that
2403 // matches wregex since it is based on wchar_t.
2404 class UTF8Iterator : public std::iterator<std::bidirectional_iterator_tag, wchar_t> {
2405 // These 3 fields determine the iterator position and are used for comparisons
2406 const Document *doc;
2407 Position position;
2408 size_t characterIndex;
2409 // Remaining fields are derived from the determining fields so are excluded in comparisons
2410 unsigned int lenBytes;
2411 size_t lenCharacters;
2412 wchar_t buffered[2];
2413 public:
2414 UTF8Iterator(const Document *doc_ = 0, Position position_ = 0) :
2415 doc(doc_), position(position_), characterIndex(0), lenBytes(0), lenCharacters(0) {
2416 buffered[0] = 0;
2417 buffered[1] = 0;
2418 if (doc) {
2419 ReadCharacter();
2422 UTF8Iterator(const UTF8Iterator &other) {
2423 doc = other.doc;
2424 position = other.position;
2425 characterIndex = other.characterIndex;
2426 lenBytes = other.lenBytes;
2427 lenCharacters = other.lenCharacters;
2428 buffered[0] = other.buffered[0];
2429 buffered[1] = other.buffered[1];
2431 UTF8Iterator &operator=(const UTF8Iterator &other) {
2432 if (this != &other) {
2433 doc = other.doc;
2434 position = other.position;
2435 characterIndex = other.characterIndex;
2436 lenBytes = other.lenBytes;
2437 lenCharacters = other.lenCharacters;
2438 buffered[0] = other.buffered[0];
2439 buffered[1] = other.buffered[1];
2441 return *this;
2443 wchar_t operator*() const {
2444 assert(lenCharacters != 0);
2445 return buffered[characterIndex];
2447 UTF8Iterator &operator++() {
2448 if ((characterIndex + 1) < (lenCharacters)) {
2449 characterIndex++;
2450 } else {
2451 position += lenBytes;
2452 ReadCharacter();
2453 characterIndex = 0;
2455 return *this;
2457 UTF8Iterator operator++(int) {
2458 UTF8Iterator retVal(*this);
2459 if ((characterIndex + 1) < (lenCharacters)) {
2460 characterIndex++;
2461 } else {
2462 position += lenBytes;
2463 ReadCharacter();
2464 characterIndex = 0;
2466 return retVal;
2468 UTF8Iterator &operator--() {
2469 if (characterIndex) {
2470 characterIndex--;
2471 } else {
2472 position = doc->NextPosition(position, -1);
2473 ReadCharacter();
2474 characterIndex = lenCharacters - 1;
2476 return *this;
2478 bool operator==(const UTF8Iterator &other) const {
2479 // Only test the determining fields, not the character widths and values derived from this
2480 return doc == other.doc &&
2481 position == other.position &&
2482 characterIndex == other.characterIndex;
2484 bool operator!=(const UTF8Iterator &other) const {
2485 // Only test the determining fields, not the character widths and values derived from this
2486 return doc != other.doc ||
2487 position != other.position ||
2488 characterIndex != other.characterIndex;
2490 int Pos() const {
2491 return position;
2493 int PosRoundUp() const {
2494 if (characterIndex)
2495 return position + lenBytes; // Force to end of character
2496 else
2497 return position;
2499 private:
2500 void ReadCharacter() {
2501 Document::CharacterExtracted charExtracted = doc->ExtractCharacter(position);
2502 lenBytes = charExtracted.widthBytes;
2503 if (charExtracted.character == unicodeReplacementChar) {
2504 lenCharacters = 1;
2505 buffered[0] = static_cast<wchar_t>(charExtracted.character);
2506 } else {
2507 lenCharacters = UTF16FromUTF32Character(charExtracted.character, buffered);
2512 #else
2514 // On Unix, report non-BMP characters as single characters
2516 class UTF8Iterator : public std::iterator<std::bidirectional_iterator_tag, wchar_t> {
2517 const Document *doc;
2518 Position position;
2519 public:
2520 UTF8Iterator(const Document *doc_=0, Position position_=0) : doc(doc_), position(position_) {
2522 UTF8Iterator(const UTF8Iterator &other) {
2523 doc = other.doc;
2524 position = other.position;
2526 UTF8Iterator &operator=(const UTF8Iterator &other) {
2527 if (this != &other) {
2528 doc = other.doc;
2529 position = other.position;
2531 return *this;
2533 wchar_t operator*() const {
2534 Document::CharacterExtracted charExtracted = doc->ExtractCharacter(position);
2535 return charExtracted.character;
2537 UTF8Iterator &operator++() {
2538 position = doc->NextPosition(position, 1);
2539 return *this;
2541 UTF8Iterator operator++(int) {
2542 UTF8Iterator retVal(*this);
2543 position = doc->NextPosition(position, 1);
2544 return retVal;
2546 UTF8Iterator &operator--() {
2547 position = doc->NextPosition(position, -1);
2548 return *this;
2550 bool operator==(const UTF8Iterator &other) const {
2551 return doc == other.doc && position == other.position;
2553 bool operator!=(const UTF8Iterator &other) const {
2554 return doc != other.doc || position != other.position;
2556 int Pos() const {
2557 return position;
2559 int PosRoundUp() const {
2560 return position;
2564 #endif
2566 std::regex_constants::match_flag_type MatchFlags(const Document *doc, int startPos, int endPos) {
2567 std::regex_constants::match_flag_type flagsMatch = std::regex_constants::match_default;
2568 if (!doc->IsLineStartPosition(startPos))
2569 flagsMatch |= std::regex_constants::match_not_bol;
2570 if (!doc->IsLineEndPosition(endPos))
2571 flagsMatch |= std::regex_constants::match_not_eol;
2572 return flagsMatch;
2575 template<typename Iterator, typename Regex>
2576 bool MatchOnLines(const Document *doc, const Regex &regexp, const RESearchRange &resr, RESearch &search) {
2577 bool matched = false;
2578 std::match_results<Iterator> match;
2580 // MSVC and libc++ have problems with ^ and $ matching line ends inside a range
2581 // If they didn't then the line by line iteration could be removed for the forwards
2582 // case and replaced with the following 4 lines:
2583 // Iterator uiStart(doc, startPos);
2584 // Iterator uiEnd(doc, endPos);
2585 // flagsMatch = MatchFlags(doc, startPos, endPos);
2586 // matched = std::regex_search(uiStart, uiEnd, match, regexp, flagsMatch);
2588 // Line by line.
2589 for (int line = resr.lineRangeStart; line != resr.lineRangeBreak; line += resr.increment) {
2590 const Range lineRange = resr.LineRange(line);
2591 Iterator itStart(doc, lineRange.start);
2592 Iterator itEnd(doc, lineRange.end);
2593 std::regex_constants::match_flag_type flagsMatch = MatchFlags(doc, lineRange.start, lineRange.end);
2594 matched = std::regex_search(itStart, itEnd, match, regexp, flagsMatch);
2595 // Check for the last match on this line.
2596 if (matched) {
2597 if (resr.increment == -1) {
2598 while (matched) {
2599 Iterator itNext(doc, match[0].second.PosRoundUp());
2600 flagsMatch = MatchFlags(doc, itNext.Pos(), lineRange.end);
2601 std::match_results<Iterator> matchNext;
2602 matched = std::regex_search(itNext, itEnd, matchNext, regexp, flagsMatch);
2603 if (matched) {
2604 if (match[0].first == match[0].second) {
2605 // Empty match means failure so exit
2606 return false;
2608 match = matchNext;
2611 matched = true;
2613 break;
2616 if (matched) {
2617 for (size_t co = 0; co < match.size(); co++) {
2618 search.bopat[co] = match[co].first.Pos();
2619 search.eopat[co] = match[co].second.PosRoundUp();
2620 size_t lenMatch = search.eopat[co] - search.bopat[co];
2621 search.pat[co].resize(lenMatch);
2622 for (size_t iPos = 0; iPos < lenMatch; iPos++) {
2623 search.pat[co][iPos] = doc->CharAt(iPos + search.bopat[co]);
2627 return matched;
2630 long Cxx11RegexFindText(Document *doc, int minPos, int maxPos, const char *s,
2631 bool caseSensitive, int *length, RESearch &search) {
2632 const RESearchRange resr(doc, minPos, maxPos);
2633 try {
2634 //ElapsedTime et;
2635 std::regex::flag_type flagsRe = std::regex::ECMAScript;
2636 // Flags that apper to have no effect:
2637 // | std::regex::collate | std::regex::extended;
2638 if (!caseSensitive)
2639 flagsRe = flagsRe | std::regex::icase;
2641 // Clear the RESearch so can fill in matches
2642 search.Clear();
2644 bool matched = false;
2645 if (SC_CP_UTF8 == doc->dbcsCodePage) {
2646 unsigned int lenS = static_cast<unsigned int>(strlen(s));
2647 std::vector<wchar_t> ws(lenS + 1);
2648 #if WCHAR_T_IS_16
2649 size_t outLen = UTF16FromUTF8(s, lenS, &ws[0], lenS);
2650 #else
2651 size_t outLen = UTF32FromUTF8(s, lenS, reinterpret_cast<unsigned int *>(&ws[0]), lenS);
2652 #endif
2653 ws[outLen] = 0;
2654 std::wregex regexp;
2655 #if defined(__APPLE__)
2656 // Using a UTF-8 locale doesn't change to Unicode over a byte buffer so '.'
2657 // is one byte not one character.
2658 // However, on OS X this makes wregex act as Unicode
2659 std::locale localeU("en_US.UTF-8");
2660 regexp.imbue(localeU);
2661 #endif
2662 regexp.assign(&ws[0], flagsRe);
2663 matched = MatchOnLines<UTF8Iterator>(doc, regexp, resr, search);
2665 } else {
2666 std::regex regexp;
2667 regexp.assign(s, flagsRe);
2668 matched = MatchOnLines<ByteIterator>(doc, regexp, resr, search);
2671 int posMatch = -1;
2672 if (matched) {
2673 posMatch = search.bopat[0];
2674 *length = search.eopat[0] - search.bopat[0];
2676 // Example - search in doc/ScintillaHistory.html for
2677 // [[:upper:]]eta[[:space:]]
2678 // On MacBook, normally around 1 second but with locale imbued -> 14 seconds.
2679 //double durSearch = et.Duration(true);
2680 //Platform::DebugPrintf("Search:%9.6g \n", durSearch);
2681 return posMatch;
2682 } catch (std::regex_error &) {
2683 // Failed to create regular expression
2684 throw RegexError();
2685 } catch (...) {
2686 // Failed in some other way
2687 return -1;
2691 #endif
2695 long BuiltinRegex::FindText(Document *doc, int minPos, int maxPos, const char *s,
2696 bool caseSensitive, bool, bool, int flags,
2697 int *length) {
2699 #ifdef CXX11_REGEX
2700 if (flags & SCFIND_CXX11REGEX) {
2701 return Cxx11RegexFindText(doc, minPos, maxPos, s,
2702 caseSensitive, length, search);
2704 #endif
2706 const RESearchRange resr(doc, minPos, maxPos);
2708 const bool posix = (flags & SCFIND_POSIX) != 0;
2710 const char *errmsg = search.Compile(s, *length, caseSensitive, posix);
2711 if (errmsg) {
2712 return -1;
2714 // Find a variable in a property file: \$(\([A-Za-z0-9_.]+\))
2715 // Replace first '.' with '-' in each property file variable reference:
2716 // Search: \$(\([A-Za-z0-9_-]+\)\.\([A-Za-z0-9_.]+\))
2717 // Replace: $(\1-\2)
2718 int pos = -1;
2719 int lenRet = 0;
2720 const char searchEnd = s[*length - 1];
2721 const char searchEndPrev = (*length > 1) ? s[*length - 2] : '\0';
2722 for (int line = resr.lineRangeStart; line != resr.lineRangeBreak; line += resr.increment) {
2723 int startOfLine = doc->LineStart(line);
2724 int endOfLine = doc->LineEnd(line);
2725 if (resr.increment == 1) {
2726 if (line == resr.lineRangeStart) {
2727 if ((resr.startPos != startOfLine) && (s[0] == '^'))
2728 continue; // Can't match start of line if start position after start of line
2729 startOfLine = resr.startPos;
2731 if (line == resr.lineRangeEnd) {
2732 if ((resr.endPos != endOfLine) && (searchEnd == '$') && (searchEndPrev != '\\'))
2733 continue; // Can't match end of line if end position before end of line
2734 endOfLine = resr.endPos;
2736 } else {
2737 if (line == resr.lineRangeEnd) {
2738 if ((resr.endPos != startOfLine) && (s[0] == '^'))
2739 continue; // Can't match start of line if end position after start of line
2740 startOfLine = resr.endPos;
2742 if (line == resr.lineRangeStart) {
2743 if ((resr.startPos != endOfLine) && (searchEnd == '$') && (searchEndPrev != '\\'))
2744 continue; // Can't match end of line if start position before end of line
2745 endOfLine = resr.startPos;
2749 DocumentIndexer di(doc, endOfLine);
2750 int success = search.Execute(di, startOfLine, endOfLine);
2751 if (success) {
2752 pos = search.bopat[0];
2753 // Ensure only whole characters selected
2754 search.eopat[0] = doc->MovePositionOutsideChar(search.eopat[0], 1, false);
2755 lenRet = search.eopat[0] - search.bopat[0];
2756 // There can be only one start of a line, so no need to look for last match in line
2757 if ((resr.increment == -1) && (s[0] != '^')) {
2758 // Check for the last match on this line.
2759 int repetitions = 1000; // Break out of infinite loop
2760 while (success && (search.eopat[0] <= endOfLine) && (repetitions--)) {
2761 success = search.Execute(di, pos+1, endOfLine);
2762 if (success) {
2763 if (search.eopat[0] <= minPos) {
2764 pos = search.bopat[0];
2765 lenRet = search.eopat[0] - search.bopat[0];
2766 } else {
2767 success = 0;
2772 break;
2775 *length = lenRet;
2776 return pos;
2779 const char *BuiltinRegex::SubstituteByPosition(Document *doc, const char *text, int *length) {
2780 substituted.clear();
2781 DocumentIndexer di(doc, doc->Length());
2782 search.GrabMatches(di);
2783 for (int j = 0; j < *length; j++) {
2784 if (text[j] == '\\') {
2785 if (text[j + 1] >= '0' && text[j + 1] <= '9') {
2786 unsigned int patNum = text[j + 1] - '0';
2787 unsigned int len = search.eopat[patNum] - search.bopat[patNum];
2788 if (!search.pat[patNum].empty()) // Will be null if try for a match that did not occur
2789 substituted.append(search.pat[patNum].c_str(), len);
2790 j++;
2791 } else {
2792 j++;
2793 switch (text[j]) {
2794 case 'a':
2795 substituted.push_back('\a');
2796 break;
2797 case 'b':
2798 substituted.push_back('\b');
2799 break;
2800 case 'f':
2801 substituted.push_back('\f');
2802 break;
2803 case 'n':
2804 substituted.push_back('\n');
2805 break;
2806 case 'r':
2807 substituted.push_back('\r');
2808 break;
2809 case 't':
2810 substituted.push_back('\t');
2811 break;
2812 case 'v':
2813 substituted.push_back('\v');
2814 break;
2815 case '\\':
2816 substituted.push_back('\\');
2817 break;
2818 default:
2819 substituted.push_back('\\');
2820 j--;
2823 } else {
2824 substituted.push_back(text[j]);
2827 *length = static_cast<int>(substituted.length());
2828 return substituted.c_str();
2831 #ifndef SCI_OWNREGEX
2833 #ifdef SCI_NAMESPACE
2835 RegexSearchBase *Scintilla::CreateRegexSearch(CharClassify *charClassTable) {
2836 return new BuiltinRegex(charClassTable);
2839 #else
2841 RegexSearchBase *CreateRegexSearch(CharClassify *charClassTable) {
2842 return new BuiltinRegex(charClassTable);
2845 #endif
2847 #endif