Update Scintilla to version 3.5.3
[TortoiseGit.git] / ext / scintilla / src / Document.cxx
blob177065747343f7cfbb87322dbf80242408b08bbb
1 // Scintilla source code edit control
2 /** @file Document.cxx
3 ** Text document that handles notifications, DBCS, styling, words and end of line.
4 **/
5 // Copyright 1998-2011 by Neil Hodgson <neilh@scintilla.org>
6 // The License.txt file describes the conditions under which this software may be distributed.
8 #include <stdlib.h>
9 #include <string.h>
10 #include <stdio.h>
11 #include <assert.h>
12 #include <ctype.h>
14 #include <stdexcept>
15 #include <string>
16 #include <vector>
17 #include <algorithm>
19 #ifdef CXX11_REGEX
20 #include <regex>
21 #endif
23 #include "Platform.h"
25 #include "ILexer.h"
26 #include "Scintilla.h"
28 #include "CharacterSet.h"
29 #include "SplitVector.h"
30 #include "Partitioning.h"
31 #include "RunStyles.h"
32 #include "CellBuffer.h"
33 #include "PerLine.h"
34 #include "CharClassify.h"
35 #include "Decoration.h"
36 #include "CaseFolder.h"
37 #include "Document.h"
38 #include "RESearch.h"
39 #include "UniConversion.h"
40 #include "UnicodeFromUTF8.h"
42 #ifdef SCI_NAMESPACE
43 using namespace Scintilla;
44 #endif
46 static inline bool IsPunctuation(char ch) {
47 return IsASCII(ch) && ispunct(ch);
50 void LexInterface::Colourise(int start, int end) {
51 if (pdoc && instance && !performingStyle) {
52 // Protect against reentrance, which may occur, for example, when
53 // fold points are discovered while performing styling and the folding
54 // code looks for child lines which may trigger styling.
55 performingStyle = true;
57 int lengthDoc = pdoc->Length();
58 if (end == -1)
59 end = lengthDoc;
60 int len = end - start;
62 PLATFORM_ASSERT(len >= 0);
63 PLATFORM_ASSERT(start + len <= lengthDoc);
65 int styleStart = 0;
66 if (start > 0)
67 styleStart = pdoc->StyleAt(start - 1);
69 if (len > 0) {
70 instance->Lex(start, len, styleStart, pdoc);
71 instance->Fold(start, len, styleStart, pdoc);
74 performingStyle = false;
78 int LexInterface::LineEndTypesSupported() {
79 if (instance) {
80 int interfaceVersion = instance->Version();
81 if (interfaceVersion >= lvSubStyles) {
82 ILexerWithSubStyles *ssinstance = static_cast<ILexerWithSubStyles *>(instance);
83 return ssinstance->LineEndTypesSupported();
86 return 0;
89 Document::Document() {
90 refCount = 0;
91 pcf = NULL;
92 #ifdef _WIN32
93 eolMode = SC_EOL_CRLF;
94 #else
95 eolMode = SC_EOL_LF;
96 #endif
97 dbcsCodePage = 0;
98 lineEndBitSet = SC_LINE_END_TYPE_DEFAULT;
99 endStyled = 0;
100 styleClock = 0;
101 enteredModification = 0;
102 enteredStyling = 0;
103 enteredReadOnlyCount = 0;
104 insertionSet = false;
105 tabInChars = 8;
106 indentInChars = 0;
107 actualIndentInChars = 8;
108 useTabs = true;
109 tabIndents = true;
110 backspaceUnindents = false;
112 matchesValid = false;
113 regex = 0;
115 UTF8BytesOfLeadInitialise();
117 perLineData[ldMarkers] = new LineMarkers();
118 perLineData[ldLevels] = new LineLevels();
119 perLineData[ldState] = new LineState();
120 perLineData[ldMargin] = new LineAnnotation();
121 perLineData[ldAnnotation] = new LineAnnotation();
123 cb.SetPerLine(this);
125 pli = 0;
128 Document::~Document() {
129 for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
130 it->watcher->NotifyDeleted(this, it->userData);
132 for (int j=0; j<ldSize; j++) {
133 delete perLineData[j];
134 perLineData[j] = 0;
136 delete regex;
137 regex = 0;
138 delete pli;
139 pli = 0;
140 delete pcf;
141 pcf = 0;
144 void Document::Init() {
145 for (int j=0; j<ldSize; j++) {
146 if (perLineData[j])
147 perLineData[j]->Init();
151 int Document::LineEndTypesSupported() const {
152 if ((SC_CP_UTF8 == dbcsCodePage) && pli)
153 return pli->LineEndTypesSupported();
154 else
155 return 0;
158 bool Document::SetDBCSCodePage(int dbcsCodePage_) {
159 if (dbcsCodePage != dbcsCodePage_) {
160 dbcsCodePage = dbcsCodePage_;
161 SetCaseFolder(NULL);
162 cb.SetLineEndTypes(lineEndBitSet & LineEndTypesSupported());
163 return true;
164 } else {
165 return false;
169 bool Document::SetLineEndTypesAllowed(int lineEndBitSet_) {
170 if (lineEndBitSet != lineEndBitSet_) {
171 lineEndBitSet = lineEndBitSet_;
172 int lineEndBitSetActive = lineEndBitSet & LineEndTypesSupported();
173 if (lineEndBitSetActive != cb.GetLineEndTypes()) {
174 ModifiedAt(0);
175 cb.SetLineEndTypes(lineEndBitSetActive);
176 return true;
177 } else {
178 return false;
180 } else {
181 return false;
185 void Document::InsertLine(int line) {
186 for (int j=0; j<ldSize; j++) {
187 if (perLineData[j])
188 perLineData[j]->InsertLine(line);
192 void Document::RemoveLine(int line) {
193 for (int j=0; j<ldSize; j++) {
194 if (perLineData[j])
195 perLineData[j]->RemoveLine(line);
199 // Increase reference count and return its previous value.
200 int Document::AddRef() {
201 return refCount++;
204 // Decrease reference count and return its previous value.
205 // Delete the document if reference count reaches zero.
206 int SCI_METHOD Document::Release() {
207 int curRefCount = --refCount;
208 if (curRefCount == 0)
209 delete this;
210 return curRefCount;
213 void Document::SetSavePoint() {
214 cb.SetSavePoint();
215 NotifySavePoint(true);
218 void Document::TentativeUndo() {
219 CheckReadOnly();
220 if (enteredModification == 0) {
221 enteredModification++;
222 if (!cb.IsReadOnly()) {
223 bool startSavePoint = cb.IsSavePoint();
224 bool multiLine = false;
225 int steps = cb.TentativeSteps();
226 //Platform::DebugPrintf("Steps=%d\n", steps);
227 for (int step = 0; step < steps; step++) {
228 const int prevLinesTotal = LinesTotal();
229 const Action &action = cb.GetUndoStep();
230 if (action.at == removeAction) {
231 NotifyModified(DocModification(
232 SC_MOD_BEFOREINSERT | SC_PERFORMED_UNDO, action));
233 } else if (action.at == containerAction) {
234 DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_UNDO);
235 dm.token = action.position;
236 NotifyModified(dm);
237 } else {
238 NotifyModified(DocModification(
239 SC_MOD_BEFOREDELETE | SC_PERFORMED_UNDO, action));
241 cb.PerformUndoStep();
242 if (action.at != containerAction) {
243 ModifiedAt(action.position);
246 int modFlags = SC_PERFORMED_UNDO;
247 // With undo, an insertion action becomes a deletion notification
248 if (action.at == removeAction) {
249 modFlags |= SC_MOD_INSERTTEXT;
250 } else if (action.at == insertAction) {
251 modFlags |= SC_MOD_DELETETEXT;
253 if (steps > 1)
254 modFlags |= SC_MULTISTEPUNDOREDO;
255 const int linesAdded = LinesTotal() - prevLinesTotal;
256 if (linesAdded != 0)
257 multiLine = true;
258 if (step == steps - 1) {
259 modFlags |= SC_LASTSTEPINUNDOREDO;
260 if (multiLine)
261 modFlags |= SC_MULTILINEUNDOREDO;
263 NotifyModified(DocModification(modFlags, action.position, action.lenData,
264 linesAdded, action.data));
267 bool endSavePoint = cb.IsSavePoint();
268 if (startSavePoint != endSavePoint)
269 NotifySavePoint(endSavePoint);
271 cb.TentativeCommit();
273 enteredModification--;
277 int Document::GetMark(int line) {
278 return static_cast<LineMarkers *>(perLineData[ldMarkers])->MarkValue(line);
281 int Document::MarkerNext(int lineStart, int mask) const {
282 return static_cast<LineMarkers *>(perLineData[ldMarkers])->MarkerNext(lineStart, mask);
285 int Document::AddMark(int line, int markerNum) {
286 if (line >= 0 && line <= LinesTotal()) {
287 int prev = static_cast<LineMarkers *>(perLineData[ldMarkers])->
288 AddMark(line, markerNum, LinesTotal());
289 DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, 0, line);
290 NotifyModified(mh);
291 return prev;
292 } else {
293 return 0;
297 void Document::AddMarkSet(int line, int valueSet) {
298 if (line < 0 || line > LinesTotal()) {
299 return;
301 unsigned int m = valueSet;
302 for (int i = 0; m; i++, m >>= 1)
303 if (m & 1)
304 static_cast<LineMarkers *>(perLineData[ldMarkers])->
305 AddMark(line, i, LinesTotal());
306 DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, 0, line);
307 NotifyModified(mh);
310 void Document::DeleteMark(int line, int markerNum) {
311 static_cast<LineMarkers *>(perLineData[ldMarkers])->DeleteMark(line, markerNum, false);
312 DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, 0, line);
313 NotifyModified(mh);
316 void Document::DeleteMarkFromHandle(int markerHandle) {
317 static_cast<LineMarkers *>(perLineData[ldMarkers])->DeleteMarkFromHandle(markerHandle);
318 DocModification mh(SC_MOD_CHANGEMARKER, 0, 0, 0, 0);
319 mh.line = -1;
320 NotifyModified(mh);
323 void Document::DeleteAllMarks(int markerNum) {
324 bool someChanges = false;
325 for (int line = 0; line < LinesTotal(); line++) {
326 if (static_cast<LineMarkers *>(perLineData[ldMarkers])->DeleteMark(line, markerNum, true))
327 someChanges = true;
329 if (someChanges) {
330 DocModification mh(SC_MOD_CHANGEMARKER, 0, 0, 0, 0);
331 mh.line = -1;
332 NotifyModified(mh);
336 int Document::LineFromHandle(int markerHandle) {
337 return static_cast<LineMarkers *>(perLineData[ldMarkers])->LineFromHandle(markerHandle);
340 int SCI_METHOD Document::LineStart(int line) const {
341 return cb.LineStart(line);
344 bool Document::IsLineStartPosition(int position) const {
345 return LineStart(LineFromPosition(position)) == position;
348 int SCI_METHOD Document::LineEnd(int line) const {
349 if (line >= LinesTotal() - 1) {
350 return LineStart(line + 1);
351 } else {
352 int position = LineStart(line + 1);
353 if (SC_CP_UTF8 == dbcsCodePage) {
354 unsigned char bytes[] = {
355 static_cast<unsigned char>(cb.CharAt(position-3)),
356 static_cast<unsigned char>(cb.CharAt(position-2)),
357 static_cast<unsigned char>(cb.CharAt(position-1)),
359 if (UTF8IsSeparator(bytes)) {
360 return position - UTF8SeparatorLength;
362 if (UTF8IsNEL(bytes+1)) {
363 return position - UTF8NELLength;
366 position--; // Back over CR or LF
367 // When line terminator is CR+LF, may need to go back one more
368 if ((position > LineStart(line)) && (cb.CharAt(position - 1) == '\r')) {
369 position--;
371 return position;
375 void SCI_METHOD Document::SetErrorStatus(int status) {
376 // Tell the watchers an error has occurred.
377 for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
378 it->watcher->NotifyErrorOccurred(this, it->userData, status);
382 int SCI_METHOD Document::LineFromPosition(int pos) const {
383 return cb.LineFromPosition(pos);
386 int Document::LineEndPosition(int position) const {
387 return LineEnd(LineFromPosition(position));
390 bool Document::IsLineEndPosition(int position) const {
391 return LineEnd(LineFromPosition(position)) == position;
394 bool Document::IsPositionInLineEnd(int position) const {
395 return position >= LineEnd(LineFromPosition(position));
398 int Document::VCHomePosition(int position) const {
399 int line = LineFromPosition(position);
400 int startPosition = LineStart(line);
401 int endLine = LineEnd(line);
402 int startText = startPosition;
403 while (startText < endLine && (cb.CharAt(startText) == ' ' || cb.CharAt(startText) == '\t'))
404 startText++;
405 if (position == startText)
406 return startPosition;
407 else
408 return startText;
411 int SCI_METHOD Document::SetLevel(int line, int level) {
412 int prev = static_cast<LineLevels *>(perLineData[ldLevels])->SetLevel(line, level, LinesTotal());
413 if (prev != level) {
414 DocModification mh(SC_MOD_CHANGEFOLD | SC_MOD_CHANGEMARKER,
415 LineStart(line), 0, 0, 0, line);
416 mh.foldLevelNow = level;
417 mh.foldLevelPrev = prev;
418 NotifyModified(mh);
420 return prev;
423 int SCI_METHOD Document::GetLevel(int line) const {
424 return static_cast<LineLevels *>(perLineData[ldLevels])->GetLevel(line);
427 void Document::ClearLevels() {
428 static_cast<LineLevels *>(perLineData[ldLevels])->ClearLevels();
431 static bool IsSubordinate(int levelStart, int levelTry) {
432 if (levelTry & SC_FOLDLEVELWHITEFLAG)
433 return true;
434 else
435 return (levelStart & SC_FOLDLEVELNUMBERMASK) < (levelTry & SC_FOLDLEVELNUMBERMASK);
438 int Document::GetLastChild(int lineParent, int level, int lastLine) {
439 if (level == -1)
440 level = GetLevel(lineParent) & SC_FOLDLEVELNUMBERMASK;
441 int maxLine = LinesTotal();
442 int lookLastLine = (lastLine != -1) ? Platform::Minimum(LinesTotal() - 1, lastLine) : -1;
443 int lineMaxSubord = lineParent;
444 while (lineMaxSubord < maxLine - 1) {
445 EnsureStyledTo(LineStart(lineMaxSubord + 2));
446 if (!IsSubordinate(level, GetLevel(lineMaxSubord + 1)))
447 break;
448 if ((lookLastLine != -1) && (lineMaxSubord >= lookLastLine) && !(GetLevel(lineMaxSubord) & SC_FOLDLEVELWHITEFLAG))
449 break;
450 lineMaxSubord++;
452 if (lineMaxSubord > lineParent) {
453 if (level > (GetLevel(lineMaxSubord + 1) & SC_FOLDLEVELNUMBERMASK)) {
454 // Have chewed up some whitespace that belongs to a parent so seek back
455 if (GetLevel(lineMaxSubord) & SC_FOLDLEVELWHITEFLAG) {
456 lineMaxSubord--;
460 return lineMaxSubord;
463 int Document::GetFoldParent(int line) const {
464 int level = GetLevel(line) & SC_FOLDLEVELNUMBERMASK;
465 int lineLook = line - 1;
466 while ((lineLook > 0) && (
467 (!(GetLevel(lineLook) & SC_FOLDLEVELHEADERFLAG)) ||
468 ((GetLevel(lineLook) & SC_FOLDLEVELNUMBERMASK) >= level))
470 lineLook--;
472 if ((GetLevel(lineLook) & SC_FOLDLEVELHEADERFLAG) &&
473 ((GetLevel(lineLook) & SC_FOLDLEVELNUMBERMASK) < level)) {
474 return lineLook;
475 } else {
476 return -1;
480 void Document::GetHighlightDelimiters(HighlightDelimiter &highlightDelimiter, int line, int lastLine) {
481 int level = GetLevel(line);
482 int lookLastLine = Platform::Maximum(line, lastLine) + 1;
484 int lookLine = line;
485 int lookLineLevel = level;
486 int lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
487 while ((lookLine > 0) && ((lookLineLevel & SC_FOLDLEVELWHITEFLAG) ||
488 ((lookLineLevel & SC_FOLDLEVELHEADERFLAG) && (lookLineLevelNum >= (GetLevel(lookLine + 1) & SC_FOLDLEVELNUMBERMASK))))) {
489 lookLineLevel = GetLevel(--lookLine);
490 lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
493 int beginFoldBlock = (lookLineLevel & SC_FOLDLEVELHEADERFLAG) ? lookLine : GetFoldParent(lookLine);
494 if (beginFoldBlock == -1) {
495 highlightDelimiter.Clear();
496 return;
499 int endFoldBlock = GetLastChild(beginFoldBlock, -1, lookLastLine);
500 int firstChangeableLineBefore = -1;
501 if (endFoldBlock < line) {
502 lookLine = beginFoldBlock - 1;
503 lookLineLevel = GetLevel(lookLine);
504 lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
505 while ((lookLine >= 0) && (lookLineLevelNum >= SC_FOLDLEVELBASE)) {
506 if (lookLineLevel & SC_FOLDLEVELHEADERFLAG) {
507 if (GetLastChild(lookLine, -1, lookLastLine) == line) {
508 beginFoldBlock = lookLine;
509 endFoldBlock = line;
510 firstChangeableLineBefore = line - 1;
513 if ((lookLine > 0) && (lookLineLevelNum == SC_FOLDLEVELBASE) && ((GetLevel(lookLine - 1) & SC_FOLDLEVELNUMBERMASK) > lookLineLevelNum))
514 break;
515 lookLineLevel = GetLevel(--lookLine);
516 lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
519 if (firstChangeableLineBefore == -1) {
520 for (lookLine = line - 1, lookLineLevel = GetLevel(lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
521 lookLine >= beginFoldBlock;
522 lookLineLevel = GetLevel(--lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK) {
523 if ((lookLineLevel & SC_FOLDLEVELWHITEFLAG) || (lookLineLevelNum > (level & SC_FOLDLEVELNUMBERMASK))) {
524 firstChangeableLineBefore = lookLine;
525 break;
529 if (firstChangeableLineBefore == -1)
530 firstChangeableLineBefore = beginFoldBlock - 1;
532 int firstChangeableLineAfter = -1;
533 for (lookLine = line + 1, lookLineLevel = GetLevel(lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
534 lookLine <= endFoldBlock;
535 lookLineLevel = GetLevel(++lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK) {
536 if ((lookLineLevel & SC_FOLDLEVELHEADERFLAG) && (lookLineLevelNum < (GetLevel(lookLine + 1) & SC_FOLDLEVELNUMBERMASK))) {
537 firstChangeableLineAfter = lookLine;
538 break;
541 if (firstChangeableLineAfter == -1)
542 firstChangeableLineAfter = endFoldBlock + 1;
544 highlightDelimiter.beginFoldBlock = beginFoldBlock;
545 highlightDelimiter.endFoldBlock = endFoldBlock;
546 highlightDelimiter.firstChangeableLineBefore = firstChangeableLineBefore;
547 highlightDelimiter.firstChangeableLineAfter = firstChangeableLineAfter;
550 int Document::ClampPositionIntoDocument(int pos) const {
551 return Platform::Clamp(pos, 0, Length());
554 bool Document::IsCrLf(int pos) const {
555 if (pos < 0)
556 return false;
557 if (pos >= (Length() - 1))
558 return false;
559 return (cb.CharAt(pos) == '\r') && (cb.CharAt(pos + 1) == '\n');
562 int Document::LenChar(int pos) {
563 if (pos < 0) {
564 return 1;
565 } else if (IsCrLf(pos)) {
566 return 2;
567 } else if (SC_CP_UTF8 == dbcsCodePage) {
568 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(pos));
569 const int widthCharBytes = UTF8BytesOfLead[leadByte];
570 int lengthDoc = Length();
571 if ((pos + widthCharBytes) > lengthDoc)
572 return lengthDoc - pos;
573 else
574 return widthCharBytes;
575 } else if (dbcsCodePage) {
576 return IsDBCSLeadByte(cb.CharAt(pos)) ? 2 : 1;
577 } else {
578 return 1;
582 bool Document::InGoodUTF8(int pos, int &start, int &end) const {
583 int trail = pos;
584 while ((trail>0) && (pos-trail < UTF8MaxBytes) && UTF8IsTrailByte(static_cast<unsigned char>(cb.CharAt(trail-1))))
585 trail--;
586 start = (trail > 0) ? trail-1 : trail;
588 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(start));
589 const int widthCharBytes = UTF8BytesOfLead[leadByte];
590 if (widthCharBytes == 1) {
591 return false;
592 } else {
593 int trailBytes = widthCharBytes - 1;
594 int len = pos - start;
595 if (len > trailBytes)
596 // pos too far from lead
597 return false;
598 char charBytes[UTF8MaxBytes] = {static_cast<char>(leadByte),0,0,0};
599 for (int b=1; b<widthCharBytes && ((start+b) < Length()); b++)
600 charBytes[b] = cb.CharAt(static_cast<int>(start+b));
601 int utf8status = UTF8Classify(reinterpret_cast<const unsigned char *>(charBytes), widthCharBytes);
602 if (utf8status & UTF8MaskInvalid)
603 return false;
604 end = start + widthCharBytes;
605 return true;
609 // Normalise a position so that it is not halfway through a two byte character.
610 // This can occur in two situations -
611 // When lines are terminated with \r\n pairs which should be treated as one character.
612 // When displaying DBCS text such as Japanese.
613 // If moving, move the position in the indicated direction.
614 int Document::MovePositionOutsideChar(int pos, int moveDir, bool checkLineEnd) const {
615 //Platform::DebugPrintf("NoCRLF %d %d\n", pos, moveDir);
616 // If out of range, just return minimum/maximum value.
617 if (pos <= 0)
618 return 0;
619 if (pos >= Length())
620 return Length();
622 // PLATFORM_ASSERT(pos > 0 && pos < Length());
623 if (checkLineEnd && IsCrLf(pos - 1)) {
624 if (moveDir > 0)
625 return pos + 1;
626 else
627 return pos - 1;
630 if (dbcsCodePage) {
631 if (SC_CP_UTF8 == dbcsCodePage) {
632 unsigned char ch = static_cast<unsigned char>(cb.CharAt(pos));
633 // If ch is not a trail byte then pos is valid intercharacter position
634 if (UTF8IsTrailByte(ch)) {
635 int startUTF = pos;
636 int endUTF = pos;
637 if (InGoodUTF8(pos, startUTF, endUTF)) {
638 // ch is a trail byte within a UTF-8 character
639 if (moveDir > 0)
640 pos = endUTF;
641 else
642 pos = startUTF;
644 // Else invalid UTF-8 so return position of isolated trail byte
646 } else {
647 // Anchor DBCS calculations at start of line because start of line can
648 // not be a DBCS trail byte.
649 int posStartLine = LineStart(LineFromPosition(pos));
650 if (pos == posStartLine)
651 return pos;
653 // Step back until a non-lead-byte is found.
654 int posCheck = pos;
655 while ((posCheck > posStartLine) && IsDBCSLeadByte(cb.CharAt(posCheck-1)))
656 posCheck--;
658 // Check from known start of character.
659 while (posCheck < pos) {
660 int mbsize = IsDBCSLeadByte(cb.CharAt(posCheck)) ? 2 : 1;
661 if (posCheck + mbsize == pos) {
662 return pos;
663 } else if (posCheck + mbsize > pos) {
664 if (moveDir > 0) {
665 return posCheck + mbsize;
666 } else {
667 return posCheck;
670 posCheck += mbsize;
675 return pos;
678 // NextPosition moves between valid positions - it can not handle a position in the middle of a
679 // multi-byte character. It is used to iterate through text more efficiently than MovePositionOutsideChar.
680 // A \r\n pair is treated as two characters.
681 int Document::NextPosition(int pos, int moveDir) const {
682 // If out of range, just return minimum/maximum value.
683 int increment = (moveDir > 0) ? 1 : -1;
684 if (pos + increment <= 0)
685 return 0;
686 if (pos + increment >= Length())
687 return Length();
689 if (dbcsCodePage) {
690 if (SC_CP_UTF8 == dbcsCodePage) {
691 if (increment == 1) {
692 // Simple forward movement case so can avoid some checks
693 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(pos));
694 if (UTF8IsAscii(leadByte)) {
695 // Single byte character or invalid
696 pos++;
697 } else {
698 const int widthCharBytes = UTF8BytesOfLead[leadByte];
699 char charBytes[UTF8MaxBytes] = {static_cast<char>(leadByte),0,0,0};
700 for (int b=1; b<widthCharBytes; b++)
701 charBytes[b] = cb.CharAt(static_cast<int>(pos+b));
702 int utf8status = UTF8Classify(reinterpret_cast<const unsigned char *>(charBytes), widthCharBytes);
703 if (utf8status & UTF8MaskInvalid)
704 pos++;
705 else
706 pos += utf8status & UTF8MaskWidth;
708 } else {
709 // Examine byte before position
710 pos--;
711 unsigned char ch = static_cast<unsigned char>(cb.CharAt(pos));
712 // If ch is not a trail byte then pos is valid intercharacter position
713 if (UTF8IsTrailByte(ch)) {
714 // If ch is a trail byte in a valid UTF-8 character then return start of character
715 int startUTF = pos;
716 int endUTF = pos;
717 if (InGoodUTF8(pos, startUTF, endUTF)) {
718 pos = startUTF;
720 // Else invalid UTF-8 so return position of isolated trail byte
723 } else {
724 if (moveDir > 0) {
725 int mbsize = IsDBCSLeadByte(cb.CharAt(pos)) ? 2 : 1;
726 pos += mbsize;
727 if (pos > Length())
728 pos = Length();
729 } else {
730 // Anchor DBCS calculations at start of line because start of line can
731 // not be a DBCS trail byte.
732 int posStartLine = LineStart(LineFromPosition(pos));
733 // See http://msdn.microsoft.com/en-us/library/cc194792%28v=MSDN.10%29.aspx
734 // http://msdn.microsoft.com/en-us/library/cc194790.aspx
735 if ((pos - 1) <= posStartLine) {
736 return pos - 1;
737 } else if (IsDBCSLeadByte(cb.CharAt(pos - 1))) {
738 // Must actually be trail byte
739 return pos - 2;
740 } else {
741 // Otherwise, step back until a non-lead-byte is found.
742 int posTemp = pos - 1;
743 while (posStartLine <= --posTemp && IsDBCSLeadByte(cb.CharAt(posTemp)))
745 // Now posTemp+1 must point to the beginning of a character,
746 // so figure out whether we went back an even or an odd
747 // number of bytes and go back 1 or 2 bytes, respectively.
748 return (pos - 1 - ((pos - posTemp) & 1));
752 } else {
753 pos += increment;
756 return pos;
759 bool Document::NextCharacter(int &pos, int moveDir) const {
760 // Returns true if pos changed
761 int posNext = NextPosition(pos, moveDir);
762 if (posNext == pos) {
763 return false;
764 } else {
765 pos = posNext;
766 return true;
770 // Return -1 on out-of-bounds
771 int SCI_METHOD Document::GetRelativePosition(int positionStart, int characterOffset) const {
772 int pos = positionStart;
773 if (dbcsCodePage) {
774 const int increment = (characterOffset > 0) ? 1 : -1;
775 while (characterOffset != 0) {
776 const int posNext = NextPosition(pos, increment);
777 if (posNext == pos)
778 return INVALID_POSITION;
779 pos = posNext;
780 characterOffset -= increment;
782 } else {
783 pos = positionStart + characterOffset;
784 if ((pos < 0) || (pos > Length()))
785 return INVALID_POSITION;
787 return pos;
790 int SCI_METHOD Document::GetCharacterAndWidth(int position, int *pWidth) const {
791 int character;
792 int bytesInCharacter = 1;
793 if (dbcsCodePage) {
794 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(position));
795 if (SC_CP_UTF8 == dbcsCodePage) {
796 if (UTF8IsAscii(leadByte)) {
797 // Single byte character or invalid
798 character = leadByte;
799 } else {
800 const int widthCharBytes = UTF8BytesOfLead[leadByte];
801 unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0};
802 for (int b=1; b<widthCharBytes; b++)
803 charBytes[b] = static_cast<unsigned char>(cb.CharAt(position+b));
804 int utf8status = UTF8Classify(charBytes, widthCharBytes);
805 if (utf8status & UTF8MaskInvalid) {
806 // Report as singleton surrogate values which are invalid Unicode
807 character = 0xDC80 + leadByte;
808 } else {
809 bytesInCharacter = utf8status & UTF8MaskWidth;
810 character = UnicodeFromUTF8(charBytes);
813 } else {
814 if (IsDBCSLeadByte(leadByte)) {
815 bytesInCharacter = 2;
816 character = (leadByte << 8) | static_cast<unsigned char>(cb.CharAt(position+1));
817 } else {
818 character = leadByte;
821 } else {
822 character = cb.CharAt(position);
824 if (pWidth) {
825 *pWidth = bytesInCharacter;
827 return character;
830 int SCI_METHOD Document::CodePage() const {
831 return dbcsCodePage;
834 bool SCI_METHOD Document::IsDBCSLeadByte(char ch) const {
835 // Byte ranges found in Wikipedia articles with relevant search strings in each case
836 unsigned char uch = static_cast<unsigned char>(ch);
837 switch (dbcsCodePage) {
838 case 932:
839 // Shift_jis
840 return ((uch >= 0x81) && (uch <= 0x9F)) ||
841 ((uch >= 0xE0) && (uch <= 0xFC));
842 // Lead bytes F0 to FC may be a Microsoft addition.
843 case 936:
844 // GBK
845 return (uch >= 0x81) && (uch <= 0xFE);
846 case 949:
847 // Korean Wansung KS C-5601-1987
848 return (uch >= 0x81) && (uch <= 0xFE);
849 case 950:
850 // Big5
851 return (uch >= 0x81) && (uch <= 0xFE);
852 case 1361:
853 // Korean Johab KS C-5601-1992
854 return
855 ((uch >= 0x84) && (uch <= 0xD3)) ||
856 ((uch >= 0xD8) && (uch <= 0xDE)) ||
857 ((uch >= 0xE0) && (uch <= 0xF9));
859 return false;
862 static inline bool IsSpaceOrTab(int ch) {
863 return ch == ' ' || ch == '\t';
866 // Need to break text into segments near lengthSegment but taking into
867 // account the encoding to not break inside a UTF-8 or DBCS character
868 // and also trying to avoid breaking inside a pair of combining characters.
869 // The segment length must always be long enough (more than 4 bytes)
870 // so that there will be at least one whole character to make a segment.
871 // For UTF-8, text must consist only of valid whole characters.
872 // In preference order from best to worst:
873 // 1) Break after space
874 // 2) Break before punctuation
875 // 3) Break after whole character
877 int Document::SafeSegment(const char *text, int length, int lengthSegment) const {
878 if (length <= lengthSegment)
879 return length;
880 int lastSpaceBreak = -1;
881 int lastPunctuationBreak = -1;
882 int lastEncodingAllowedBreak = 0;
883 for (int j=0; j < lengthSegment;) {
884 unsigned char ch = static_cast<unsigned char>(text[j]);
885 if (j > 0) {
886 if (IsSpaceOrTab(text[j - 1]) && !IsSpaceOrTab(text[j])) {
887 lastSpaceBreak = j;
889 if (ch < 'A') {
890 lastPunctuationBreak = j;
893 lastEncodingAllowedBreak = j;
895 if (dbcsCodePage == SC_CP_UTF8) {
896 j += UTF8BytesOfLead[ch];
897 } else if (dbcsCodePage) {
898 j += IsDBCSLeadByte(ch) ? 2 : 1;
899 } else {
900 j++;
903 if (lastSpaceBreak >= 0) {
904 return lastSpaceBreak;
905 } else if (lastPunctuationBreak >= 0) {
906 return lastPunctuationBreak;
908 return lastEncodingAllowedBreak;
911 EncodingFamily Document::CodePageFamily() const {
912 if (SC_CP_UTF8 == dbcsCodePage)
913 return efUnicode;
914 else if (dbcsCodePage)
915 return efDBCS;
916 else
917 return efEightBit;
920 void Document::ModifiedAt(int pos) {
921 if (endStyled > pos)
922 endStyled = pos;
925 void Document::CheckReadOnly() {
926 if (cb.IsReadOnly() && enteredReadOnlyCount == 0) {
927 enteredReadOnlyCount++;
928 NotifyModifyAttempt();
929 enteredReadOnlyCount--;
933 // Document only modified by gateways DeleteChars, InsertString, Undo, Redo, and SetStyleAt.
934 // SetStyleAt does not change the persistent state of a document
936 bool Document::DeleteChars(int pos, int len) {
937 if (pos < 0)
938 return false;
939 if (len <= 0)
940 return false;
941 if ((pos + len) > Length())
942 return false;
943 CheckReadOnly();
944 if (enteredModification != 0) {
945 return false;
946 } else {
947 enteredModification++;
948 if (!cb.IsReadOnly()) {
949 NotifyModified(
950 DocModification(
951 SC_MOD_BEFOREDELETE | SC_PERFORMED_USER,
952 pos, len,
953 0, 0));
954 int prevLinesTotal = LinesTotal();
955 bool startSavePoint = cb.IsSavePoint();
956 bool startSequence = false;
957 const char *text = cb.DeleteChars(pos, len, startSequence);
958 if (startSavePoint && cb.IsCollectingUndo())
959 NotifySavePoint(!startSavePoint);
960 if ((pos < Length()) || (pos == 0))
961 ModifiedAt(pos);
962 else
963 ModifiedAt(pos-1);
964 NotifyModified(
965 DocModification(
966 SC_MOD_DELETETEXT | SC_PERFORMED_USER | (startSequence?SC_STARTACTION:0),
967 pos, len,
968 LinesTotal() - prevLinesTotal, text));
970 enteredModification--;
972 return !cb.IsReadOnly();
976 * Insert a string with a length.
978 int Document::InsertString(int position, const char *s, int insertLength) {
979 if (insertLength <= 0) {
980 return 0;
982 CheckReadOnly(); // Application may change read only state here
983 if (cb.IsReadOnly()) {
984 return 0;
986 if (enteredModification != 0) {
987 return 0;
989 enteredModification++;
990 insertionSet = false;
991 insertion.clear();
992 NotifyModified(
993 DocModification(
994 SC_MOD_INSERTCHECK,
995 position, insertLength,
996 0, s));
997 if (insertionSet) {
998 s = insertion.c_str();
999 insertLength = static_cast<int>(insertion.length());
1001 NotifyModified(
1002 DocModification(
1003 SC_MOD_BEFOREINSERT | SC_PERFORMED_USER,
1004 position, insertLength,
1005 0, s));
1006 int prevLinesTotal = LinesTotal();
1007 bool startSavePoint = cb.IsSavePoint();
1008 bool startSequence = false;
1009 const char *text = cb.InsertString(position, s, insertLength, startSequence);
1010 if (startSavePoint && cb.IsCollectingUndo())
1011 NotifySavePoint(!startSavePoint);
1012 ModifiedAt(position);
1013 NotifyModified(
1014 DocModification(
1015 SC_MOD_INSERTTEXT | SC_PERFORMED_USER | (startSequence?SC_STARTACTION:0),
1016 position, insertLength,
1017 LinesTotal() - prevLinesTotal, text));
1018 if (insertionSet) { // Free memory as could be large
1019 std::string().swap(insertion);
1021 enteredModification--;
1022 return insertLength;
1025 void Document::ChangeInsertion(const char *s, int length) {
1026 insertionSet = true;
1027 insertion.assign(s, length);
1030 int SCI_METHOD Document::AddData(char *data, int length) {
1031 try {
1032 int position = Length();
1033 InsertString(position, data, length);
1034 } catch (std::bad_alloc &) {
1035 return SC_STATUS_BADALLOC;
1036 } catch (...) {
1037 return SC_STATUS_FAILURE;
1039 return 0;
1042 void * SCI_METHOD Document::ConvertToDocument() {
1043 return this;
1046 int Document::Undo() {
1047 int newPos = -1;
1048 CheckReadOnly();
1049 if ((enteredModification == 0) && (cb.IsCollectingUndo())) {
1050 enteredModification++;
1051 if (!cb.IsReadOnly()) {
1052 bool startSavePoint = cb.IsSavePoint();
1053 bool multiLine = false;
1054 int steps = cb.StartUndo();
1055 //Platform::DebugPrintf("Steps=%d\n", steps);
1056 int coalescedRemovePos = -1;
1057 int coalescedRemoveLen = 0;
1058 int prevRemoveActionPos = -1;
1059 int prevRemoveActionLen = 0;
1060 for (int step = 0; step < steps; step++) {
1061 const int prevLinesTotal = LinesTotal();
1062 const Action &action = cb.GetUndoStep();
1063 if (action.at == removeAction) {
1064 NotifyModified(DocModification(
1065 SC_MOD_BEFOREINSERT | SC_PERFORMED_UNDO, action));
1066 } else if (action.at == containerAction) {
1067 DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_UNDO);
1068 dm.token = action.position;
1069 NotifyModified(dm);
1070 if (!action.mayCoalesce) {
1071 coalescedRemovePos = -1;
1072 coalescedRemoveLen = 0;
1073 prevRemoveActionPos = -1;
1074 prevRemoveActionLen = 0;
1076 } else {
1077 NotifyModified(DocModification(
1078 SC_MOD_BEFOREDELETE | SC_PERFORMED_UNDO, action));
1080 cb.PerformUndoStep();
1081 if (action.at != containerAction) {
1082 ModifiedAt(action.position);
1083 newPos = action.position;
1086 int modFlags = SC_PERFORMED_UNDO;
1087 // With undo, an insertion action becomes a deletion notification
1088 if (action.at == removeAction) {
1089 newPos += action.lenData;
1090 modFlags |= SC_MOD_INSERTTEXT;
1091 if ((coalescedRemoveLen > 0) &&
1092 (action.position == prevRemoveActionPos || action.position == (prevRemoveActionPos + prevRemoveActionLen))) {
1093 coalescedRemoveLen += action.lenData;
1094 newPos = coalescedRemovePos + coalescedRemoveLen;
1095 } else {
1096 coalescedRemovePos = action.position;
1097 coalescedRemoveLen = action.lenData;
1099 prevRemoveActionPos = action.position;
1100 prevRemoveActionLen = action.lenData;
1101 } else if (action.at == insertAction) {
1102 modFlags |= SC_MOD_DELETETEXT;
1103 coalescedRemovePos = -1;
1104 coalescedRemoveLen = 0;
1105 prevRemoveActionPos = -1;
1106 prevRemoveActionLen = 0;
1108 if (steps > 1)
1109 modFlags |= SC_MULTISTEPUNDOREDO;
1110 const int linesAdded = LinesTotal() - prevLinesTotal;
1111 if (linesAdded != 0)
1112 multiLine = true;
1113 if (step == steps - 1) {
1114 modFlags |= SC_LASTSTEPINUNDOREDO;
1115 if (multiLine)
1116 modFlags |= SC_MULTILINEUNDOREDO;
1118 NotifyModified(DocModification(modFlags, action.position, action.lenData,
1119 linesAdded, action.data));
1122 bool endSavePoint = cb.IsSavePoint();
1123 if (startSavePoint != endSavePoint)
1124 NotifySavePoint(endSavePoint);
1126 enteredModification--;
1128 return newPos;
1131 int Document::Redo() {
1132 int newPos = -1;
1133 CheckReadOnly();
1134 if ((enteredModification == 0) && (cb.IsCollectingUndo())) {
1135 enteredModification++;
1136 if (!cb.IsReadOnly()) {
1137 bool startSavePoint = cb.IsSavePoint();
1138 bool multiLine = false;
1139 int steps = cb.StartRedo();
1140 for (int step = 0; step < steps; step++) {
1141 const int prevLinesTotal = LinesTotal();
1142 const Action &action = cb.GetRedoStep();
1143 if (action.at == insertAction) {
1144 NotifyModified(DocModification(
1145 SC_MOD_BEFOREINSERT | SC_PERFORMED_REDO, action));
1146 } else if (action.at == containerAction) {
1147 DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_REDO);
1148 dm.token = action.position;
1149 NotifyModified(dm);
1150 } else {
1151 NotifyModified(DocModification(
1152 SC_MOD_BEFOREDELETE | SC_PERFORMED_REDO, action));
1154 cb.PerformRedoStep();
1155 if (action.at != containerAction) {
1156 ModifiedAt(action.position);
1157 newPos = action.position;
1160 int modFlags = SC_PERFORMED_REDO;
1161 if (action.at == insertAction) {
1162 newPos += action.lenData;
1163 modFlags |= SC_MOD_INSERTTEXT;
1164 } else if (action.at == removeAction) {
1165 modFlags |= SC_MOD_DELETETEXT;
1167 if (steps > 1)
1168 modFlags |= SC_MULTISTEPUNDOREDO;
1169 const int linesAdded = LinesTotal() - prevLinesTotal;
1170 if (linesAdded != 0)
1171 multiLine = true;
1172 if (step == steps - 1) {
1173 modFlags |= SC_LASTSTEPINUNDOREDO;
1174 if (multiLine)
1175 modFlags |= SC_MULTILINEUNDOREDO;
1177 NotifyModified(
1178 DocModification(modFlags, action.position, action.lenData,
1179 linesAdded, action.data));
1182 bool endSavePoint = cb.IsSavePoint();
1183 if (startSavePoint != endSavePoint)
1184 NotifySavePoint(endSavePoint);
1186 enteredModification--;
1188 return newPos;
1191 void Document::DelChar(int pos) {
1192 DeleteChars(pos, LenChar(pos));
1195 void Document::DelCharBack(int pos) {
1196 if (pos <= 0) {
1197 return;
1198 } else if (IsCrLf(pos - 2)) {
1199 DeleteChars(pos - 2, 2);
1200 } else if (dbcsCodePage) {
1201 int startChar = NextPosition(pos, -1);
1202 DeleteChars(startChar, pos - startChar);
1203 } else {
1204 DeleteChars(pos - 1, 1);
1208 static int NextTab(int pos, int tabSize) {
1209 return ((pos / tabSize) + 1) * tabSize;
1212 static std::string CreateIndentation(int indent, int tabSize, bool insertSpaces) {
1213 std::string indentation;
1214 if (!insertSpaces) {
1215 while (indent >= tabSize) {
1216 indentation += '\t';
1217 indent -= tabSize;
1220 while (indent > 0) {
1221 indentation += ' ';
1222 indent--;
1224 return indentation;
1227 int SCI_METHOD Document::GetLineIndentation(int line) {
1228 int indent = 0;
1229 if ((line >= 0) && (line < LinesTotal())) {
1230 int lineStart = LineStart(line);
1231 int length = Length();
1232 for (int i = lineStart; i < length; i++) {
1233 char ch = cb.CharAt(i);
1234 if (ch == ' ')
1235 indent++;
1236 else if (ch == '\t')
1237 indent = NextTab(indent, tabInChars);
1238 else
1239 return indent;
1242 return indent;
1245 int Document::SetLineIndentation(int line, int indent) {
1246 int indentOfLine = GetLineIndentation(line);
1247 if (indent < 0)
1248 indent = 0;
1249 if (indent != indentOfLine) {
1250 std::string linebuf = CreateIndentation(indent, tabInChars, !useTabs);
1251 int thisLineStart = LineStart(line);
1252 int indentPos = GetLineIndentPosition(line);
1253 UndoGroup ug(this);
1254 DeleteChars(thisLineStart, indentPos - thisLineStart);
1255 return thisLineStart + InsertString(thisLineStart, linebuf.c_str(),
1256 static_cast<int>(linebuf.length()));
1257 } else {
1258 return GetLineIndentPosition(line);
1262 int Document::GetLineIndentPosition(int line) const {
1263 if (line < 0)
1264 return 0;
1265 int pos = LineStart(line);
1266 int length = Length();
1267 while ((pos < length) && IsSpaceOrTab(cb.CharAt(pos))) {
1268 pos++;
1270 return pos;
1273 int Document::GetColumn(int pos) {
1274 int column = 0;
1275 int line = LineFromPosition(pos);
1276 if ((line >= 0) && (line < LinesTotal())) {
1277 for (int i = LineStart(line); i < pos;) {
1278 char ch = cb.CharAt(i);
1279 if (ch == '\t') {
1280 column = NextTab(column, tabInChars);
1281 i++;
1282 } else if (ch == '\r') {
1283 return column;
1284 } else if (ch == '\n') {
1285 return column;
1286 } else if (i >= Length()) {
1287 return column;
1288 } else {
1289 column++;
1290 i = NextPosition(i, 1);
1294 return column;
1297 int Document::CountCharacters(int startPos, int endPos) const {
1298 startPos = MovePositionOutsideChar(startPos, 1, false);
1299 endPos = MovePositionOutsideChar(endPos, -1, false);
1300 int count = 0;
1301 int i = startPos;
1302 while (i < endPos) {
1303 count++;
1304 if (IsCrLf(i))
1305 i++;
1306 i = NextPosition(i, 1);
1308 return count;
1311 int Document::FindColumn(int line, int column) {
1312 int position = LineStart(line);
1313 if ((line >= 0) && (line < LinesTotal())) {
1314 int columnCurrent = 0;
1315 while ((columnCurrent < column) && (position < Length())) {
1316 char ch = cb.CharAt(position);
1317 if (ch == '\t') {
1318 columnCurrent = NextTab(columnCurrent, tabInChars);
1319 if (columnCurrent > column)
1320 return position;
1321 position++;
1322 } else if (ch == '\r') {
1323 return position;
1324 } else if (ch == '\n') {
1325 return position;
1326 } else {
1327 columnCurrent++;
1328 position = NextPosition(position, 1);
1332 return position;
1335 void Document::Indent(bool forwards, int lineBottom, int lineTop) {
1336 // Dedent - suck white space off the front of the line to dedent by equivalent of a tab
1337 for (int line = lineBottom; line >= lineTop; line--) {
1338 int indentOfLine = GetLineIndentation(line);
1339 if (forwards) {
1340 if (LineStart(line) < LineEnd(line)) {
1341 SetLineIndentation(line, indentOfLine + IndentSize());
1343 } else {
1344 SetLineIndentation(line, indentOfLine - IndentSize());
1349 // Convert line endings for a piece of text to a particular mode.
1350 // Stop at len or when a NUL is found.
1351 std::string Document::TransformLineEnds(const char *s, size_t len, int eolModeWanted) {
1352 std::string dest;
1353 for (size_t i = 0; (i < len) && (s[i]); i++) {
1354 if (s[i] == '\n' || s[i] == '\r') {
1355 if (eolModeWanted == SC_EOL_CR) {
1356 dest.push_back('\r');
1357 } else if (eolModeWanted == SC_EOL_LF) {
1358 dest.push_back('\n');
1359 } else { // eolModeWanted == SC_EOL_CRLF
1360 dest.push_back('\r');
1361 dest.push_back('\n');
1363 if ((s[i] == '\r') && (i+1 < len) && (s[i+1] == '\n')) {
1364 i++;
1366 } else {
1367 dest.push_back(s[i]);
1370 return dest;
1373 void Document::ConvertLineEnds(int eolModeSet) {
1374 UndoGroup ug(this);
1376 for (int pos = 0; pos < Length(); pos++) {
1377 if (cb.CharAt(pos) == '\r') {
1378 if (cb.CharAt(pos + 1) == '\n') {
1379 // CRLF
1380 if (eolModeSet == SC_EOL_CR) {
1381 DeleteChars(pos + 1, 1); // Delete the LF
1382 } else if (eolModeSet == SC_EOL_LF) {
1383 DeleteChars(pos, 1); // Delete the CR
1384 } else {
1385 pos++;
1387 } else {
1388 // CR
1389 if (eolModeSet == SC_EOL_CRLF) {
1390 pos += InsertString(pos + 1, "\n", 1); // Insert LF
1391 } else if (eolModeSet == SC_EOL_LF) {
1392 pos += InsertString(pos, "\n", 1); // Insert LF
1393 DeleteChars(pos, 1); // Delete CR
1394 pos--;
1397 } else if (cb.CharAt(pos) == '\n') {
1398 // LF
1399 if (eolModeSet == SC_EOL_CRLF) {
1400 pos += InsertString(pos, "\r", 1); // Insert CR
1401 } else if (eolModeSet == SC_EOL_CR) {
1402 pos += InsertString(pos, "\r", 1); // Insert CR
1403 DeleteChars(pos, 1); // Delete LF
1404 pos--;
1411 bool Document::IsWhiteLine(int line) const {
1412 int currentChar = LineStart(line);
1413 int endLine = LineEnd(line);
1414 while (currentChar < endLine) {
1415 if (cb.CharAt(currentChar) != ' ' && cb.CharAt(currentChar) != '\t') {
1416 return false;
1418 ++currentChar;
1420 return true;
1423 int Document::ParaUp(int pos) const {
1424 int line = LineFromPosition(pos);
1425 line--;
1426 while (line >= 0 && IsWhiteLine(line)) { // skip empty lines
1427 line--;
1429 while (line >= 0 && !IsWhiteLine(line)) { // skip non-empty lines
1430 line--;
1432 line++;
1433 return LineStart(line);
1436 int Document::ParaDown(int pos) const {
1437 int line = LineFromPosition(pos);
1438 while (line < LinesTotal() && !IsWhiteLine(line)) { // skip non-empty lines
1439 line++;
1441 while (line < LinesTotal() && IsWhiteLine(line)) { // skip empty lines
1442 line++;
1444 if (line < LinesTotal())
1445 return LineStart(line);
1446 else // end of a document
1447 return LineEnd(line-1);
1450 CharClassify::cc Document::WordCharClass(unsigned char ch) const {
1451 if ((SC_CP_UTF8 == dbcsCodePage) && (!UTF8IsAscii(ch)))
1452 return CharClassify::ccWord;
1453 return charClass.GetClass(ch);
1457 * Used by commmands that want to select whole words.
1458 * Finds the start of word at pos when delta < 0 or the end of the word when delta >= 0.
1460 int Document::ExtendWordSelect(int pos, int delta, bool onlyWordCharacters) {
1461 CharClassify::cc ccStart = CharClassify::ccWord;
1462 if (delta < 0) {
1463 if (!onlyWordCharacters)
1464 ccStart = WordCharClass(cb.CharAt(pos-1));
1465 while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == ccStart))
1466 pos--;
1467 } else {
1468 if (!onlyWordCharacters && pos < Length())
1469 ccStart = WordCharClass(cb.CharAt(pos));
1470 while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == ccStart))
1471 pos++;
1473 return MovePositionOutsideChar(pos, delta, true);
1477 * Find the start of the next word in either a forward (delta >= 0) or backwards direction
1478 * (delta < 0).
1479 * This is looking for a transition between character classes although there is also some
1480 * additional movement to transit white space.
1481 * Used by cursor movement by word commands.
1483 int Document::NextWordStart(int pos, int delta) {
1484 if (delta < 0) {
1485 while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == CharClassify::ccSpace))
1486 pos--;
1487 if (pos > 0) {
1488 CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos-1));
1489 while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == ccStart)) {
1490 pos--;
1493 } else {
1494 CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos));
1495 while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == ccStart))
1496 pos++;
1497 while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == CharClassify::ccSpace))
1498 pos++;
1500 return pos;
1504 * Find the end of the next word in either a forward (delta >= 0) or backwards direction
1505 * (delta < 0).
1506 * This is looking for a transition between character classes although there is also some
1507 * additional movement to transit white space.
1508 * Used by cursor movement by word commands.
1510 int Document::NextWordEnd(int pos, int delta) {
1511 if (delta < 0) {
1512 if (pos > 0) {
1513 CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos-1));
1514 if (ccStart != CharClassify::ccSpace) {
1515 while (pos > 0 && WordCharClass(cb.CharAt(pos - 1)) == ccStart) {
1516 pos--;
1519 while (pos > 0 && WordCharClass(cb.CharAt(pos - 1)) == CharClassify::ccSpace) {
1520 pos--;
1523 } else {
1524 while (pos < Length() && WordCharClass(cb.CharAt(pos)) == CharClassify::ccSpace) {
1525 pos++;
1527 if (pos < Length()) {
1528 CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos));
1529 while (pos < Length() && WordCharClass(cb.CharAt(pos)) == ccStart) {
1530 pos++;
1534 return pos;
1538 * Check that the character at the given position is a word or punctuation character and that
1539 * the previous character is of a different character class.
1541 bool Document::IsWordStartAt(int pos) const {
1542 if (pos > 0) {
1543 CharClassify::cc ccPos = WordCharClass(CharAt(pos));
1544 return (ccPos == CharClassify::ccWord || ccPos == CharClassify::ccPunctuation) &&
1545 (ccPos != WordCharClass(CharAt(pos - 1)));
1547 return true;
1551 * Check that the character at the given position is a word or punctuation character and that
1552 * the next character is of a different character class.
1554 bool Document::IsWordEndAt(int pos) const {
1555 if (pos < Length()) {
1556 CharClassify::cc ccPrev = WordCharClass(CharAt(pos-1));
1557 return (ccPrev == CharClassify::ccWord || ccPrev == CharClassify::ccPunctuation) &&
1558 (ccPrev != WordCharClass(CharAt(pos)));
1560 return true;
1564 * Check that the given range is has transitions between character classes at both
1565 * ends and where the characters on the inside are word or punctuation characters.
1567 bool Document::IsWordAt(int start, int end) const {
1568 return IsWordStartAt(start) && IsWordEndAt(end);
1571 bool Document::MatchesWordOptions(bool word, bool wordStart, int pos, int length) const {
1572 return (!word && !wordStart) ||
1573 (word && IsWordAt(pos, pos + length)) ||
1574 (wordStart && IsWordStartAt(pos));
1577 bool Document::HasCaseFolder(void) const {
1578 return pcf != 0;
1581 void Document::SetCaseFolder(CaseFolder *pcf_) {
1582 delete pcf;
1583 pcf = pcf_;
1586 Document::CharacterExtracted Document::ExtractCharacter(int position) const {
1587 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(position));
1588 if (UTF8IsAscii(leadByte)) {
1589 // Common case: ASCII character
1590 return CharacterExtracted(leadByte, 1);
1592 const int widthCharBytes = UTF8BytesOfLead[leadByte];
1593 unsigned char charBytes[UTF8MaxBytes] = { leadByte, 0, 0, 0 };
1594 for (int b=1; b<widthCharBytes; b++)
1595 charBytes[b] = static_cast<unsigned char>(cb.CharAt(position + b));
1596 int utf8status = UTF8Classify(charBytes, widthCharBytes);
1597 if (utf8status & UTF8MaskInvalid) {
1598 // Treat as invalid and use up just one byte
1599 return CharacterExtracted(unicodeReplacementChar, 1);
1600 } else {
1601 return CharacterExtracted(UnicodeFromUTF8(charBytes), utf8status & UTF8MaskWidth);
1606 * Find text in document, supporting both forward and backward
1607 * searches (just pass minPos > maxPos to do a backward search)
1608 * Has not been tested with backwards DBCS searches yet.
1610 long Document::FindText(int minPos, int maxPos, const char *search,
1611 bool caseSensitive, bool word, bool wordStart, bool regExp, int flags,
1612 int *length) {
1613 if (*length <= 0)
1614 return minPos;
1615 if (regExp) {
1616 if (!regex)
1617 regex = CreateRegexSearch(&charClass);
1618 return regex->FindText(this, minPos, maxPos, search, caseSensitive, word, wordStart, flags, length);
1619 } else {
1621 const bool forward = minPos <= maxPos;
1622 const int increment = forward ? 1 : -1;
1624 // Range endpoints should not be inside DBCS characters, but just in case, move them.
1625 const int startPos = MovePositionOutsideChar(minPos, increment, false);
1626 const int endPos = MovePositionOutsideChar(maxPos, increment, false);
1628 // Compute actual search ranges needed
1629 const int lengthFind = *length;
1631 //Platform::DebugPrintf("Find %d %d %s %d\n", startPos, endPos, ft->lpstrText, lengthFind);
1632 const int limitPos = Platform::Maximum(startPos, endPos);
1633 int pos = startPos;
1634 if (!forward) {
1635 // Back all of a character
1636 pos = NextPosition(pos, increment);
1638 if (caseSensitive) {
1639 const int endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos;
1640 const char charStartSearch = search[0];
1641 while (forward ? (pos < endSearch) : (pos >= endSearch)) {
1642 if (CharAt(pos) == charStartSearch) {
1643 bool found = (pos + lengthFind) <= limitPos;
1644 for (int indexSearch = 1; (indexSearch < lengthFind) && found; indexSearch++) {
1645 found = CharAt(pos + indexSearch) == search[indexSearch];
1647 if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
1648 return pos;
1651 if (!NextCharacter(pos, increment))
1652 break;
1654 } else if (SC_CP_UTF8 == dbcsCodePage) {
1655 const size_t maxFoldingExpansion = 4;
1656 std::vector<char> searchThing(lengthFind * UTF8MaxBytes * maxFoldingExpansion + 1);
1657 const int lenSearch = static_cast<int>(
1658 pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind));
1659 char bytes[UTF8MaxBytes + 1];
1660 char folded[UTF8MaxBytes * maxFoldingExpansion + 1];
1661 while (forward ? (pos < endPos) : (pos >= endPos)) {
1662 int widthFirstCharacter = 0;
1663 int posIndexDocument = pos;
1664 int indexSearch = 0;
1665 bool characterMatches = true;
1666 for (;;) {
1667 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(posIndexDocument));
1668 bytes[0] = leadByte;
1669 int widthChar = 1;
1670 if (!UTF8IsAscii(leadByte)) {
1671 const int widthCharBytes = UTF8BytesOfLead[leadByte];
1672 for (int b=1; b<widthCharBytes; b++) {
1673 bytes[b] = cb.CharAt(posIndexDocument+b);
1675 widthChar = UTF8Classify(reinterpret_cast<const unsigned char *>(bytes), widthCharBytes) & UTF8MaskWidth;
1677 if (!widthFirstCharacter)
1678 widthFirstCharacter = widthChar;
1679 if ((posIndexDocument + widthChar) > limitPos)
1680 break;
1681 const int lenFlat = static_cast<int>(pcf->Fold(folded, sizeof(folded), bytes, widthChar));
1682 folded[lenFlat] = 0;
1683 // Does folded match the buffer
1684 characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat);
1685 if (!characterMatches)
1686 break;
1687 posIndexDocument += widthChar;
1688 indexSearch += lenFlat;
1689 if (indexSearch >= lenSearch)
1690 break;
1692 if (characterMatches && (indexSearch == static_cast<int>(lenSearch))) {
1693 if (MatchesWordOptions(word, wordStart, pos, posIndexDocument - pos)) {
1694 *length = posIndexDocument - pos;
1695 return pos;
1698 if (forward) {
1699 pos += widthFirstCharacter;
1700 } else {
1701 if (!NextCharacter(pos, increment))
1702 break;
1705 } else if (dbcsCodePage) {
1706 const size_t maxBytesCharacter = 2;
1707 const size_t maxFoldingExpansion = 4;
1708 std::vector<char> searchThing(lengthFind * maxBytesCharacter * maxFoldingExpansion + 1);
1709 const int lenSearch = static_cast<int>(
1710 pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind));
1711 while (forward ? (pos < endPos) : (pos >= endPos)) {
1712 int indexDocument = 0;
1713 int indexSearch = 0;
1714 bool characterMatches = true;
1715 while (characterMatches &&
1716 ((pos + indexDocument) < limitPos) &&
1717 (indexSearch < lenSearch)) {
1718 char bytes[maxBytesCharacter + 1];
1719 bytes[0] = cb.CharAt(pos + indexDocument);
1720 const int widthChar = IsDBCSLeadByte(bytes[0]) ? 2 : 1;
1721 if (widthChar == 2)
1722 bytes[1] = cb.CharAt(pos + indexDocument + 1);
1723 if ((pos + indexDocument + widthChar) > limitPos)
1724 break;
1725 char folded[maxBytesCharacter * maxFoldingExpansion + 1];
1726 const int lenFlat = static_cast<int>(pcf->Fold(folded, sizeof(folded), bytes, widthChar));
1727 folded[lenFlat] = 0;
1728 // Does folded match the buffer
1729 characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat);
1730 indexDocument += widthChar;
1731 indexSearch += lenFlat;
1733 if (characterMatches && (indexSearch == static_cast<int>(lenSearch))) {
1734 if (MatchesWordOptions(word, wordStart, pos, indexDocument)) {
1735 *length = indexDocument;
1736 return pos;
1739 if (!NextCharacter(pos, increment))
1740 break;
1742 } else {
1743 const int endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos;
1744 std::vector<char> searchThing(lengthFind + 1);
1745 pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind);
1746 while (forward ? (pos < endSearch) : (pos >= endSearch)) {
1747 bool found = (pos + lengthFind) <= limitPos;
1748 for (int indexSearch = 0; (indexSearch < lengthFind) && found; indexSearch++) {
1749 char ch = CharAt(pos + indexSearch);
1750 char folded[2];
1751 pcf->Fold(folded, sizeof(folded), &ch, 1);
1752 found = folded[0] == searchThing[indexSearch];
1754 if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
1755 return pos;
1757 if (!NextCharacter(pos, increment))
1758 break;
1762 //Platform::DebugPrintf("Not found\n");
1763 return -1;
1766 const char *Document::SubstituteByPosition(const char *text, int *length) {
1767 if (regex)
1768 return regex->SubstituteByPosition(this, text, length);
1769 else
1770 return 0;
1773 int Document::LinesTotal() const {
1774 return cb.Lines();
1777 void Document::SetDefaultCharClasses(bool includeWordClass) {
1778 charClass.SetDefaultCharClasses(includeWordClass);
1781 void Document::SetCharClasses(const unsigned char *chars, CharClassify::cc newCharClass) {
1782 charClass.SetCharClasses(chars, newCharClass);
1785 int Document::GetCharsOfClass(CharClassify::cc characterClass, unsigned char *buffer) {
1786 return charClass.GetCharsOfClass(characterClass, buffer);
1789 void SCI_METHOD Document::StartStyling(int position, char) {
1790 endStyled = position;
1793 bool SCI_METHOD Document::SetStyleFor(int length, char style) {
1794 if (enteredStyling != 0) {
1795 return false;
1796 } else {
1797 enteredStyling++;
1798 int prevEndStyled = endStyled;
1799 if (cb.SetStyleFor(endStyled, length, style)) {
1800 DocModification mh(SC_MOD_CHANGESTYLE | SC_PERFORMED_USER,
1801 prevEndStyled, length);
1802 NotifyModified(mh);
1804 endStyled += length;
1805 enteredStyling--;
1806 return true;
1810 bool SCI_METHOD Document::SetStyles(int length, const char *styles) {
1811 if (enteredStyling != 0) {
1812 return false;
1813 } else {
1814 enteredStyling++;
1815 bool didChange = false;
1816 int startMod = 0;
1817 int endMod = 0;
1818 for (int iPos = 0; iPos < length; iPos++, endStyled++) {
1819 PLATFORM_ASSERT(endStyled < Length());
1820 if (cb.SetStyleAt(endStyled, styles[iPos])) {
1821 if (!didChange) {
1822 startMod = endStyled;
1824 didChange = true;
1825 endMod = endStyled;
1828 if (didChange) {
1829 DocModification mh(SC_MOD_CHANGESTYLE | SC_PERFORMED_USER,
1830 startMod, endMod - startMod + 1);
1831 NotifyModified(mh);
1833 enteredStyling--;
1834 return true;
1838 void Document::EnsureStyledTo(int pos) {
1839 if ((enteredStyling == 0) && (pos > GetEndStyled())) {
1840 IncrementStyleClock();
1841 if (pli && !pli->UseContainerLexing()) {
1842 int lineEndStyled = LineFromPosition(GetEndStyled());
1843 int endStyledTo = LineStart(lineEndStyled);
1844 pli->Colourise(endStyledTo, pos);
1845 } else {
1846 // Ask the watchers to style, and stop as soon as one responds.
1847 for (std::vector<WatcherWithUserData>::iterator it = watchers.begin();
1848 (pos > GetEndStyled()) && (it != watchers.end()); ++it) {
1849 it->watcher->NotifyStyleNeeded(this, it->userData, pos);
1855 void Document::LexerChanged() {
1856 // Tell the watchers the lexer has changed.
1857 for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
1858 it->watcher->NotifyLexerChanged(this, it->userData);
1862 int SCI_METHOD Document::SetLineState(int line, int state) {
1863 int statePrevious = static_cast<LineState *>(perLineData[ldState])->SetLineState(line, state);
1864 if (state != statePrevious) {
1865 DocModification mh(SC_MOD_CHANGELINESTATE, LineStart(line), 0, 0, 0, line);
1866 NotifyModified(mh);
1868 return statePrevious;
1871 int SCI_METHOD Document::GetLineState(int line) const {
1872 return static_cast<LineState *>(perLineData[ldState])->GetLineState(line);
1875 int Document::GetMaxLineState() {
1876 return static_cast<LineState *>(perLineData[ldState])->GetMaxLineState();
1879 void SCI_METHOD Document::ChangeLexerState(int start, int end) {
1880 DocModification mh(SC_MOD_LEXERSTATE, start, end-start, 0, 0, 0);
1881 NotifyModified(mh);
1884 StyledText Document::MarginStyledText(int line) const {
1885 LineAnnotation *pla = static_cast<LineAnnotation *>(perLineData[ldMargin]);
1886 return StyledText(pla->Length(line), pla->Text(line),
1887 pla->MultipleStyles(line), pla->Style(line), pla->Styles(line));
1890 void Document::MarginSetText(int line, const char *text) {
1891 static_cast<LineAnnotation *>(perLineData[ldMargin])->SetText(line, text);
1892 DocModification mh(SC_MOD_CHANGEMARGIN, LineStart(line), 0, 0, 0, line);
1893 NotifyModified(mh);
1896 void Document::MarginSetStyle(int line, int style) {
1897 static_cast<LineAnnotation *>(perLineData[ldMargin])->SetStyle(line, style);
1898 NotifyModified(DocModification(SC_MOD_CHANGEMARGIN, LineStart(line), 0, 0, 0, line));
1901 void Document::MarginSetStyles(int line, const unsigned char *styles) {
1902 static_cast<LineAnnotation *>(perLineData[ldMargin])->SetStyles(line, styles);
1903 NotifyModified(DocModification(SC_MOD_CHANGEMARGIN, LineStart(line), 0, 0, 0, line));
1906 void Document::MarginClearAll() {
1907 int maxEditorLine = LinesTotal();
1908 for (int l=0; l<maxEditorLine; l++)
1909 MarginSetText(l, 0);
1910 // Free remaining data
1911 static_cast<LineAnnotation *>(perLineData[ldMargin])->ClearAll();
1914 StyledText Document::AnnotationStyledText(int line) const {
1915 LineAnnotation *pla = static_cast<LineAnnotation *>(perLineData[ldAnnotation]);
1916 return StyledText(pla->Length(line), pla->Text(line),
1917 pla->MultipleStyles(line), pla->Style(line), pla->Styles(line));
1920 void Document::AnnotationSetText(int line, const char *text) {
1921 if (line >= 0 && line < LinesTotal()) {
1922 const int linesBefore = AnnotationLines(line);
1923 static_cast<LineAnnotation *>(perLineData[ldAnnotation])->SetText(line, text);
1924 const int linesAfter = AnnotationLines(line);
1925 DocModification mh(SC_MOD_CHANGEANNOTATION, LineStart(line), 0, 0, 0, line);
1926 mh.annotationLinesAdded = linesAfter - linesBefore;
1927 NotifyModified(mh);
1931 void Document::AnnotationSetStyle(int line, int style) {
1932 static_cast<LineAnnotation *>(perLineData[ldAnnotation])->SetStyle(line, style);
1933 DocModification mh(SC_MOD_CHANGEANNOTATION, LineStart(line), 0, 0, 0, line);
1934 NotifyModified(mh);
1937 void Document::AnnotationSetStyles(int line, const unsigned char *styles) {
1938 if (line >= 0 && line < LinesTotal()) {
1939 static_cast<LineAnnotation *>(perLineData[ldAnnotation])->SetStyles(line, styles);
1943 int Document::AnnotationLines(int line) const {
1944 return static_cast<LineAnnotation *>(perLineData[ldAnnotation])->Lines(line);
1947 void Document::AnnotationClearAll() {
1948 int maxEditorLine = LinesTotal();
1949 for (int l=0; l<maxEditorLine; l++)
1950 AnnotationSetText(l, 0);
1951 // Free remaining data
1952 static_cast<LineAnnotation *>(perLineData[ldAnnotation])->ClearAll();
1955 void Document::IncrementStyleClock() {
1956 styleClock = (styleClock + 1) % 0x100000;
1959 void SCI_METHOD Document::DecorationFillRange(int position, int value, int fillLength) {
1960 if (decorations.FillRange(position, value, fillLength)) {
1961 DocModification mh(SC_MOD_CHANGEINDICATOR | SC_PERFORMED_USER,
1962 position, fillLength);
1963 NotifyModified(mh);
1967 bool Document::AddWatcher(DocWatcher *watcher, void *userData) {
1968 WatcherWithUserData wwud(watcher, userData);
1969 std::vector<WatcherWithUserData>::iterator it =
1970 std::find(watchers.begin(), watchers.end(), wwud);
1971 if (it != watchers.end())
1972 return false;
1973 watchers.push_back(wwud);
1974 return true;
1977 bool Document::RemoveWatcher(DocWatcher *watcher, void *userData) {
1978 std::vector<WatcherWithUserData>::iterator it =
1979 std::find(watchers.begin(), watchers.end(), WatcherWithUserData(watcher, userData));
1980 if (it != watchers.end()) {
1981 watchers.erase(it);
1982 return true;
1984 return false;
1987 void Document::NotifyModifyAttempt() {
1988 for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
1989 it->watcher->NotifyModifyAttempt(this, it->userData);
1993 void Document::NotifySavePoint(bool atSavePoint) {
1994 for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
1995 it->watcher->NotifySavePoint(this, it->userData, atSavePoint);
1999 void Document::NotifyModified(DocModification mh) {
2000 if (mh.modificationType & SC_MOD_INSERTTEXT) {
2001 decorations.InsertSpace(mh.position, mh.length);
2002 } else if (mh.modificationType & SC_MOD_DELETETEXT) {
2003 decorations.DeleteRange(mh.position, mh.length);
2005 for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
2006 it->watcher->NotifyModified(this, mh, it->userData);
2010 bool Document::IsWordPartSeparator(char ch) const {
2011 return (WordCharClass(ch) == CharClassify::ccWord) && IsPunctuation(ch);
2014 int Document::WordPartLeft(int pos) {
2015 if (pos > 0) {
2016 --pos;
2017 char startChar = cb.CharAt(pos);
2018 if (IsWordPartSeparator(startChar)) {
2019 while (pos > 0 && IsWordPartSeparator(cb.CharAt(pos))) {
2020 --pos;
2023 if (pos > 0) {
2024 startChar = cb.CharAt(pos);
2025 --pos;
2026 if (IsLowerCase(startChar)) {
2027 while (pos > 0 && IsLowerCase(cb.CharAt(pos)))
2028 --pos;
2029 if (!IsUpperCase(cb.CharAt(pos)) && !IsLowerCase(cb.CharAt(pos)))
2030 ++pos;
2031 } else if (IsUpperCase(startChar)) {
2032 while (pos > 0 && IsUpperCase(cb.CharAt(pos)))
2033 --pos;
2034 if (!IsUpperCase(cb.CharAt(pos)))
2035 ++pos;
2036 } else if (IsADigit(startChar)) {
2037 while (pos > 0 && IsADigit(cb.CharAt(pos)))
2038 --pos;
2039 if (!IsADigit(cb.CharAt(pos)))
2040 ++pos;
2041 } else if (IsPunctuation(startChar)) {
2042 while (pos > 0 && IsPunctuation(cb.CharAt(pos)))
2043 --pos;
2044 if (!IsPunctuation(cb.CharAt(pos)))
2045 ++pos;
2046 } else if (isspacechar(startChar)) {
2047 while (pos > 0 && isspacechar(cb.CharAt(pos)))
2048 --pos;
2049 if (!isspacechar(cb.CharAt(pos)))
2050 ++pos;
2051 } else if (!IsASCII(startChar)) {
2052 while (pos > 0 && !IsASCII(cb.CharAt(pos)))
2053 --pos;
2054 if (IsASCII(cb.CharAt(pos)))
2055 ++pos;
2056 } else {
2057 ++pos;
2061 return pos;
2064 int Document::WordPartRight(int pos) {
2065 char startChar = cb.CharAt(pos);
2066 int length = Length();
2067 if (IsWordPartSeparator(startChar)) {
2068 while (pos < length && IsWordPartSeparator(cb.CharAt(pos)))
2069 ++pos;
2070 startChar = cb.CharAt(pos);
2072 if (!IsASCII(startChar)) {
2073 while (pos < length && !IsASCII(cb.CharAt(pos)))
2074 ++pos;
2075 } else if (IsLowerCase(startChar)) {
2076 while (pos < length && IsLowerCase(cb.CharAt(pos)))
2077 ++pos;
2078 } else if (IsUpperCase(startChar)) {
2079 if (IsLowerCase(cb.CharAt(pos + 1))) {
2080 ++pos;
2081 while (pos < length && IsLowerCase(cb.CharAt(pos)))
2082 ++pos;
2083 } else {
2084 while (pos < length && IsUpperCase(cb.CharAt(pos)))
2085 ++pos;
2087 if (IsLowerCase(cb.CharAt(pos)) && IsUpperCase(cb.CharAt(pos - 1)))
2088 --pos;
2089 } else if (IsADigit(startChar)) {
2090 while (pos < length && IsADigit(cb.CharAt(pos)))
2091 ++pos;
2092 } else if (IsPunctuation(startChar)) {
2093 while (pos < length && IsPunctuation(cb.CharAt(pos)))
2094 ++pos;
2095 } else if (isspacechar(startChar)) {
2096 while (pos < length && isspacechar(cb.CharAt(pos)))
2097 ++pos;
2098 } else {
2099 ++pos;
2101 return pos;
2104 bool IsLineEndChar(char c) {
2105 return (c == '\n' || c == '\r');
2108 int Document::ExtendStyleRange(int pos, int delta, bool singleLine) {
2109 int sStart = cb.StyleAt(pos);
2110 if (delta < 0) {
2111 while (pos > 0 && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsLineEndChar(cb.CharAt(pos))))
2112 pos--;
2113 pos++;
2114 } else {
2115 while (pos < (Length()) && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsLineEndChar(cb.CharAt(pos))))
2116 pos++;
2118 return pos;
2121 static char BraceOpposite(char ch) {
2122 switch (ch) {
2123 case '(':
2124 return ')';
2125 case ')':
2126 return '(';
2127 case '[':
2128 return ']';
2129 case ']':
2130 return '[';
2131 case '{':
2132 return '}';
2133 case '}':
2134 return '{';
2135 case '<':
2136 return '>';
2137 case '>':
2138 return '<';
2139 default:
2140 return '\0';
2144 // TODO: should be able to extend styled region to find matching brace
2145 int Document::BraceMatch(int position, int /*maxReStyle*/) {
2146 char chBrace = CharAt(position);
2147 char chSeek = BraceOpposite(chBrace);
2148 if (chSeek == '\0')
2149 return - 1;
2150 char styBrace = static_cast<char>(StyleAt(position));
2151 int direction = -1;
2152 if (chBrace == '(' || chBrace == '[' || chBrace == '{' || chBrace == '<')
2153 direction = 1;
2154 int depth = 1;
2155 position = NextPosition(position, direction);
2156 while ((position >= 0) && (position < Length())) {
2157 char chAtPos = CharAt(position);
2158 char styAtPos = static_cast<char>(StyleAt(position));
2159 if ((position > GetEndStyled()) || (styAtPos == styBrace)) {
2160 if (chAtPos == chBrace)
2161 depth++;
2162 if (chAtPos == chSeek)
2163 depth--;
2164 if (depth == 0)
2165 return position;
2167 int positionBeforeMove = position;
2168 position = NextPosition(position, direction);
2169 if (position == positionBeforeMove)
2170 break;
2172 return - 1;
2176 * Implementation of RegexSearchBase for the default built-in regular expression engine
2178 class BuiltinRegex : public RegexSearchBase {
2179 public:
2180 explicit BuiltinRegex(CharClassify *charClassTable) : search(charClassTable) {}
2182 virtual ~BuiltinRegex() {
2185 virtual long FindText(Document *doc, int minPos, int maxPos, const char *s,
2186 bool caseSensitive, bool word, bool wordStart, int flags,
2187 int *length);
2189 virtual const char *SubstituteByPosition(Document *doc, const char *text, int *length);
2191 private:
2192 RESearch search;
2193 std::string substituted;
2196 namespace {
2199 * RESearchRange keeps track of search range.
2201 class RESearchRange {
2202 public:
2203 const Document *doc;
2204 int increment;
2205 int startPos;
2206 int endPos;
2207 int lineRangeStart;
2208 int lineRangeEnd;
2209 int lineRangeBreak;
2210 RESearchRange(const Document *doc_, int minPos, int maxPos) : doc(doc_) {
2211 increment = (minPos <= maxPos) ? 1 : -1;
2213 // Range endpoints should not be inside DBCS characters, but just in case, move them.
2214 startPos = doc->MovePositionOutsideChar(minPos, 1, false);
2215 endPos = doc->MovePositionOutsideChar(maxPos, 1, false);
2217 lineRangeStart = doc->LineFromPosition(startPos);
2218 lineRangeEnd = doc->LineFromPosition(endPos);
2219 if ((increment == 1) &&
2220 (startPos >= doc->LineEnd(lineRangeStart)) &&
2221 (lineRangeStart < lineRangeEnd)) {
2222 // the start position is at end of line or between line end characters.
2223 lineRangeStart++;
2224 startPos = doc->LineStart(lineRangeStart);
2225 } else if ((increment == -1) &&
2226 (startPos <= doc->LineStart(lineRangeStart)) &&
2227 (lineRangeStart > lineRangeEnd)) {
2228 // the start position is at beginning of line.
2229 lineRangeStart--;
2230 startPos = doc->LineEnd(lineRangeStart);
2232 lineRangeBreak = lineRangeEnd + increment;
2234 Range LineRange(int line) const {
2235 Range range(doc->LineStart(line), doc->LineEnd(line));
2236 if (increment == 1) {
2237 if (line == lineRangeStart)
2238 range.start = startPos;
2239 if (line == lineRangeEnd)
2240 range.end = endPos;
2241 } else {
2242 if (line == lineRangeEnd)
2243 range.start = endPos;
2244 if (line == lineRangeStart)
2245 range.end = startPos;
2247 return range;
2251 // Define a way for the Regular Expression code to access the document
2252 class DocumentIndexer : public CharacterIndexer {
2253 Document *pdoc;
2254 int end;
2255 public:
2256 DocumentIndexer(Document *pdoc_, int end_) :
2257 pdoc(pdoc_), end(end_) {
2260 virtual ~DocumentIndexer() {
2263 virtual char CharAt(int index) {
2264 if (index < 0 || index >= end)
2265 return 0;
2266 else
2267 return pdoc->CharAt(index);
2271 #ifdef CXX11_REGEX
2273 class ByteIterator : public std::iterator<std::bidirectional_iterator_tag, char> {
2274 public:
2275 const Document *doc;
2276 Position position;
2277 ByteIterator(const Document *doc_ = 0, Position position_ = 0) : doc(doc_), position(position_) {
2279 ByteIterator(const ByteIterator &other) {
2280 doc = other.doc;
2281 position = other.position;
2283 ByteIterator &operator=(const ByteIterator &other) {
2284 if (this != &other) {
2285 doc = other.doc;
2286 position = other.position;
2288 return *this;
2290 char operator*() const {
2291 return doc->CharAt(position);
2293 ByteIterator &operator++() {
2294 position++;
2295 return *this;
2297 ByteIterator operator++(int) {
2298 ByteIterator retVal(*this);
2299 position++;
2300 return retVal;
2302 ByteIterator &operator--() {
2303 position--;
2304 return *this;
2306 bool operator==(const ByteIterator &other) const {
2307 return doc == other.doc && position == other.position;
2309 bool operator!=(const ByteIterator &other) const {
2310 return doc != other.doc || position != other.position;
2312 int Pos() const {
2313 return position;
2315 int PosRoundUp() const {
2316 return position;
2320 // On Windows, wchar_t is 16 bits wide and on Unix it is 32 bits wide.
2321 // Would be better to use sizeof(wchar_t) or similar to differentiate
2322 // but easier for now to hard-code platforms.
2323 // C++11 has char16_t and char32_t but neither Clang nor Visual C++
2324 // appear to allow specializing basic_regex over these.
2326 #ifdef _WIN32
2327 #define WCHAR_T_IS_16 1
2328 #else
2329 #define WCHAR_T_IS_16 0
2330 #endif
2332 #if WCHAR_T_IS_16
2334 // On Windows, report non-BMP characters as 2 separate surrogates as that
2335 // matches wregex since it is based on wchar_t.
2336 class UTF8Iterator : public std::iterator<std::bidirectional_iterator_tag, wchar_t> {
2337 // These 3 fields determine the iterator position and are used for comparisons
2338 const Document *doc;
2339 Position position;
2340 size_t characterIndex;
2341 // Remaining fields are derived from the determining fields so are excluded in comparisons
2342 unsigned int lenBytes;
2343 size_t lenCharacters;
2344 wchar_t buffered[2];
2345 public:
2346 UTF8Iterator(const Document *doc_ = 0, Position position_ = 0) :
2347 doc(doc_), position(position_), characterIndex(0), lenBytes(0), lenCharacters(0) {
2348 buffered[0] = 0;
2349 buffered[1] = 0;
2351 UTF8Iterator(const UTF8Iterator &other) {
2352 doc = other.doc;
2353 position = other.position;
2354 characterIndex = other.characterIndex;
2355 lenBytes = other.lenBytes;
2356 lenCharacters = other.lenCharacters;
2357 buffered[0] = other.buffered[0];
2358 buffered[1] = other.buffered[1];
2360 UTF8Iterator &operator=(const UTF8Iterator &other) {
2361 if (this != &other) {
2362 doc = other.doc;
2363 position = other.position;
2364 characterIndex = other.characterIndex;
2365 lenBytes = other.lenBytes;
2366 lenCharacters = other.lenCharacters;
2367 buffered[0] = other.buffered[0];
2368 buffered[1] = other.buffered[1];
2370 return *this;
2372 wchar_t operator*() {
2373 if (lenCharacters == 0) {
2374 ReadCharacter();
2376 return buffered[characterIndex];
2378 UTF8Iterator &operator++() {
2379 if ((characterIndex + 1) < (lenCharacters)) {
2380 characterIndex++;
2381 } else {
2382 position += lenBytes;
2383 ReadCharacter();
2384 characterIndex = 0;
2386 return *this;
2388 UTF8Iterator operator++(int) {
2389 UTF8Iterator retVal(*this);
2390 if ((characterIndex + 1) < (lenCharacters)) {
2391 characterIndex++;
2392 } else {
2393 position += lenBytes;
2394 ReadCharacter();
2395 characterIndex = 0;
2397 return retVal;
2399 UTF8Iterator &operator--() {
2400 if (characterIndex) {
2401 characterIndex--;
2402 } else {
2403 position = doc->NextPosition(position, -1);
2404 ReadCharacter();
2405 characterIndex = lenCharacters - 1;
2407 return *this;
2409 bool operator==(const UTF8Iterator &other) const {
2410 // Only test the determining fields, not the character widths and values derived from this
2411 return doc == other.doc &&
2412 position == other.position &&
2413 characterIndex == other.characterIndex;
2415 bool operator!=(const UTF8Iterator &other) const {
2416 // Only test the determining fields, not the character widths and values derived from this
2417 return doc != other.doc ||
2418 position != other.position ||
2419 characterIndex != other.characterIndex;
2421 int Pos() const {
2422 return position;
2424 int PosRoundUp() const {
2425 if (characterIndex)
2426 return position + lenBytes; // Force to end of character
2427 else
2428 return position;
2430 private:
2431 void ReadCharacter() {
2432 Document::CharacterExtracted charExtracted = doc->ExtractCharacter(position);
2433 lenBytes = charExtracted.widthBytes;
2434 if (charExtracted.character == unicodeReplacementChar) {
2435 lenCharacters = 1;
2436 buffered[0] = static_cast<wchar_t>(charExtracted.character);
2437 } else {
2438 lenCharacters = UTF16FromUTF32Character(charExtracted.character, buffered);
2443 #else
2445 // On Unix, report non-BMP characters as single characters
2447 class UTF8Iterator : public std::iterator<std::bidirectional_iterator_tag, wchar_t> {
2448 const Document *doc;
2449 Position position;
2450 public:
2451 UTF8Iterator(const Document *doc_=0, Position position_=0) : doc(doc_), position(position_) {
2453 UTF8Iterator(const UTF8Iterator &other) {
2454 doc = other.doc;
2455 position = other.position;
2457 UTF8Iterator &operator=(const UTF8Iterator &other) {
2458 if (this != &other) {
2459 doc = other.doc;
2460 position = other.position;
2462 return *this;
2464 wchar_t operator*() const {
2465 Document::CharacterExtracted charExtracted = doc->ExtractCharacter(position);
2466 return charExtracted.character;
2468 UTF8Iterator &operator++() {
2469 position = doc->NextPosition(position, 1);
2470 return *this;
2472 UTF8Iterator operator++(int) {
2473 UTF8Iterator retVal(*this);
2474 position = doc->NextPosition(position, 1);
2475 return retVal;
2477 UTF8Iterator &operator--() {
2478 position = doc->NextPosition(position, -1);
2479 return *this;
2481 bool operator==(const UTF8Iterator &other) const {
2482 return doc == other.doc && position == other.position;
2484 bool operator!=(const UTF8Iterator &other) const {
2485 return doc != other.doc || position != other.position;
2487 int Pos() const {
2488 return position;
2490 int PosRoundUp() const {
2491 return position;
2495 #endif
2497 std::regex_constants::match_flag_type MatchFlags(const Document *doc, int startPos, int endPos) {
2498 std::regex_constants::match_flag_type flagsMatch = std::regex_constants::match_default;
2499 if (!doc->IsLineStartPosition(startPos))
2500 flagsMatch |= std::regex_constants::match_not_bol;
2501 if (!doc->IsLineEndPosition(endPos))
2502 flagsMatch |= std::regex_constants::match_not_eol;
2503 return flagsMatch;
2506 template<typename Iterator, typename Regex>
2507 bool MatchOnLines(const Document *doc, const Regex &regexp, const RESearchRange &resr, RESearch &search) {
2508 bool matched = false;
2509 std::match_results<Iterator> match;
2511 // MSVC and libc++ have problems with ^ and $ matching line ends inside a range
2512 // If they didn't then the line by line iteration could be removed for the forwards
2513 // case and replaced with the following 4 lines:
2514 // Iterator uiStart(doc, startPos);
2515 // Iterator uiEnd(doc, endPos);
2516 // flagsMatch = MatchFlags(doc, startPos, endPos);
2517 // matched = std::regex_search(uiStart, uiEnd, match, regexp, flagsMatch);
2519 // Line by line.
2520 for (int line = resr.lineRangeStart; line != resr.lineRangeBreak; line += resr.increment) {
2521 const Range lineRange = resr.LineRange(line);
2522 Iterator itStart(doc, lineRange.start);
2523 Iterator itEnd(doc, lineRange.end);
2524 std::regex_constants::match_flag_type flagsMatch = MatchFlags(doc, lineRange.start, lineRange.end);
2525 matched = std::regex_search(itStart, itEnd, match, regexp, flagsMatch);
2526 // Check for the last match on this line.
2527 if (matched) {
2528 if (resr.increment == -1) {
2529 while (matched) {
2530 Iterator itNext(doc, match[0].second.PosRoundUp());
2531 flagsMatch = MatchFlags(doc, itNext.Pos(), lineRange.end);
2532 std::match_results<Iterator> matchNext;
2533 matched = std::regex_search(itNext, itEnd, matchNext, regexp, flagsMatch);
2534 if (matched) {
2535 if (match[0].first == match[0].second) {
2536 // Empty match means failure so exit
2537 return false;
2539 match = matchNext;
2542 matched = true;
2544 break;
2547 if (matched) {
2548 for (size_t co = 0; co < match.size(); co++) {
2549 search.bopat[co] = match[co].first.Pos();
2550 search.eopat[co] = match[co].second.PosRoundUp();
2551 size_t lenMatch = search.eopat[co] - search.bopat[co];
2552 search.pat[co].resize(lenMatch);
2553 for (size_t iPos = 0; iPos < lenMatch; iPos++) {
2554 search.pat[co][iPos] = doc->CharAt(iPos + search.bopat[co]);
2558 return matched;
2561 long Cxx11RegexFindText(Document *doc, int minPos, int maxPos, const char *s,
2562 bool caseSensitive, int *length, RESearch &search) {
2563 const RESearchRange resr(doc, minPos, maxPos);
2564 try {
2565 //ElapsedTime et;
2566 std::regex::flag_type flagsRe = std::regex::ECMAScript;
2567 // Flags that apper to have no effect:
2568 // | std::regex::collate | std::regex::extended;
2569 if (!caseSensitive)
2570 flagsRe = flagsRe | std::regex::icase;
2572 // Clear the RESearch so can fill in matches
2573 search.Clear();
2575 bool matched = false;
2576 if (SC_CP_UTF8 == doc->dbcsCodePage) {
2577 unsigned int lenS = static_cast<unsigned int>(strlen(s));
2578 std::vector<wchar_t> ws(lenS + 1);
2579 #if WCHAR_T_IS_16
2580 size_t outLen = UTF16FromUTF8(s, lenS, &ws[0], lenS);
2581 #else
2582 size_t outLen = UTF32FromUTF8(s, lenS, reinterpret_cast<unsigned int *>(&ws[0]), lenS);
2583 #endif
2584 ws[outLen] = 0;
2585 std::wregex regexp;
2586 #if defined(__APPLE__)
2587 // Using a UTF-8 locale doesn't change to Unicode over a byte buffer so '.'
2588 // is one byte not one character.
2589 // However, on OS X this makes wregex act as Unicode
2590 std::locale localeU("en_US.UTF-8");
2591 regexp.imbue(localeU);
2592 #endif
2593 regexp.assign(&ws[0], flagsRe);
2594 matched = MatchOnLines<UTF8Iterator>(doc, regexp, resr, search);
2596 } else {
2597 std::regex regexp;
2598 regexp.assign(s, flagsRe);
2599 matched = MatchOnLines<ByteIterator>(doc, regexp, resr, search);
2602 int posMatch = -1;
2603 if (matched) {
2604 posMatch = search.bopat[0];
2605 *length = search.eopat[0] - search.bopat[0];
2607 // Example - search in doc/ScintillaHistory.html for
2608 // [[:upper:]]eta[[:space:]]
2609 // On MacBook, normally around 1 second but with locale imbued -> 14 seconds.
2610 //double durSearch = et.Duration(true);
2611 //Platform::DebugPrintf("Search:%9.6g \n", durSearch);
2612 return posMatch;
2613 } catch (std::regex_error &) {
2614 // Failed to create regular expression
2615 throw RegexError();
2616 } catch (...) {
2617 // Failed in some other way
2618 return -1;
2622 #endif
2626 long BuiltinRegex::FindText(Document *doc, int minPos, int maxPos, const char *s,
2627 bool caseSensitive, bool, bool, int flags,
2628 int *length) {
2630 #ifdef CXX11_REGEX
2631 if (flags & SCFIND_CXX11REGEX) {
2632 return Cxx11RegexFindText(doc, minPos, maxPos, s,
2633 caseSensitive, length, search);
2635 #endif
2637 const RESearchRange resr(doc, minPos, maxPos);
2639 const bool posix = (flags & SCFIND_POSIX) != 0;
2641 const char *errmsg = search.Compile(s, *length, caseSensitive, posix);
2642 if (errmsg) {
2643 return -1;
2645 // Find a variable in a property file: \$(\([A-Za-z0-9_.]+\))
2646 // Replace first '.' with '-' in each property file variable reference:
2647 // Search: \$(\([A-Za-z0-9_-]+\)\.\([A-Za-z0-9_.]+\))
2648 // Replace: $(\1-\2)
2649 int pos = -1;
2650 int lenRet = 0;
2651 const char searchEnd = s[*length - 1];
2652 const char searchEndPrev = (*length > 1) ? s[*length - 2] : '\0';
2653 for (int line = resr.lineRangeStart; line != resr.lineRangeBreak; line += resr.increment) {
2654 int startOfLine = doc->LineStart(line);
2655 int endOfLine = doc->LineEnd(line);
2656 if (resr.increment == 1) {
2657 if (line == resr.lineRangeStart) {
2658 if ((resr.startPos != startOfLine) && (s[0] == '^'))
2659 continue; // Can't match start of line if start position after start of line
2660 startOfLine = resr.startPos;
2662 if (line == resr.lineRangeEnd) {
2663 if ((resr.endPos != endOfLine) && (searchEnd == '$') && (searchEndPrev != '\\'))
2664 continue; // Can't match end of line if end position before end of line
2665 endOfLine = resr.endPos;
2667 } else {
2668 if (line == resr.lineRangeEnd) {
2669 if ((resr.endPos != startOfLine) && (s[0] == '^'))
2670 continue; // Can't match start of line if end position after start of line
2671 startOfLine = resr.endPos;
2673 if (line == resr.lineRangeStart) {
2674 if ((resr.startPos != endOfLine) && (searchEnd == '$') && (searchEndPrev != '\\'))
2675 continue; // Can't match end of line if start position before end of line
2676 endOfLine = resr.startPos;
2680 DocumentIndexer di(doc, endOfLine);
2681 int success = search.Execute(di, startOfLine, endOfLine);
2682 if (success) {
2683 pos = search.bopat[0];
2684 // Ensure only whole characters selected
2685 search.eopat[0] = doc->MovePositionOutsideChar(search.eopat[0], 1, false);
2686 lenRet = search.eopat[0] - search.bopat[0];
2687 // There can be only one start of a line, so no need to look for last match in line
2688 if ((resr.increment == -1) && (s[0] != '^')) {
2689 // Check for the last match on this line.
2690 int repetitions = 1000; // Break out of infinite loop
2691 while (success && (search.eopat[0] <= endOfLine) && (repetitions--)) {
2692 success = search.Execute(di, pos+1, endOfLine);
2693 if (success) {
2694 if (search.eopat[0] <= minPos) {
2695 pos = search.bopat[0];
2696 lenRet = search.eopat[0] - search.bopat[0];
2697 } else {
2698 success = 0;
2703 break;
2706 *length = lenRet;
2707 return pos;
2710 const char *BuiltinRegex::SubstituteByPosition(Document *doc, const char *text, int *length) {
2711 substituted.clear();
2712 DocumentIndexer di(doc, doc->Length());
2713 search.GrabMatches(di);
2714 for (int j = 0; j < *length; j++) {
2715 if (text[j] == '\\') {
2716 if (text[j + 1] >= '0' && text[j + 1] <= '9') {
2717 unsigned int patNum = text[j + 1] - '0';
2718 unsigned int len = search.eopat[patNum] - search.bopat[patNum];
2719 if (!search.pat[patNum].empty()) // Will be null if try for a match that did not occur
2720 substituted.append(search.pat[patNum].c_str(), len);
2721 j++;
2722 } else {
2723 j++;
2724 switch (text[j]) {
2725 case 'a':
2726 substituted.push_back('\a');
2727 break;
2728 case 'b':
2729 substituted.push_back('\b');
2730 break;
2731 case 'f':
2732 substituted.push_back('\f');
2733 break;
2734 case 'n':
2735 substituted.push_back('\n');
2736 break;
2737 case 'r':
2738 substituted.push_back('\r');
2739 break;
2740 case 't':
2741 substituted.push_back('\t');
2742 break;
2743 case 'v':
2744 substituted.push_back('\v');
2745 break;
2746 case '\\':
2747 substituted.push_back('\\');
2748 break;
2749 default:
2750 substituted.push_back('\\');
2751 j--;
2754 } else {
2755 substituted.push_back(text[j]);
2758 *length = static_cast<int>(substituted.length());
2759 return substituted.c_str();
2762 #ifndef SCI_OWNREGEX
2764 #ifdef SCI_NAMESPACE
2766 RegexSearchBase *Scintilla::CreateRegexSearch(CharClassify *charClassTable) {
2767 return new BuiltinRegex(charClassTable);
2770 #else
2772 RegexSearchBase *CreateRegexSearch(CharClassify *charClassTable) {
2773 return new BuiltinRegex(charClassTable);
2776 #endif
2778 #endif