Update Scintilla to version 3.4.1
[geany-mirror.git] / scintilla / src / Document.cxx
blob1788cf74ec252e5cc4790638eb0ed8ee7a3acc51
1 // Scintilla source code edit control
2 /** @file Document.cxx
3 ** Text document that handles notifications, DBCS, styling, words and end of line.
4 **/
5 // Copyright 1998-2011 by Neil Hodgson <neilh@scintilla.org>
6 // The License.txt file describes the conditions under which this software may be distributed.
8 #include <stdlib.h>
9 #include <string.h>
10 #include <stdio.h>
11 #include <ctype.h>
12 #include <assert.h>
14 #include <string>
15 #include <vector>
16 #include <algorithm>
18 #include "Platform.h"
20 #include "ILexer.h"
21 #include "Scintilla.h"
23 #include "SplitVector.h"
24 #include "Partitioning.h"
25 #include "RunStyles.h"
26 #include "CellBuffer.h"
27 #include "PerLine.h"
28 #include "CharClassify.h"
29 #include "CharacterSet.h"
30 #include "Decoration.h"
31 #include "CaseFolder.h"
32 #include "Document.h"
33 #include "RESearch.h"
34 #include "UniConversion.h"
36 #ifdef SCI_NAMESPACE
37 using namespace Scintilla;
38 #endif
40 static inline bool IsPunctuation(char ch) {
41 return IsASCII(ch) && ispunct(ch);
44 void LexInterface::Colourise(int start, int end) {
45 if (pdoc && instance && !performingStyle) {
46 // Protect against reentrance, which may occur, for example, when
47 // fold points are discovered while performing styling and the folding
48 // code looks for child lines which may trigger styling.
49 performingStyle = true;
51 int lengthDoc = pdoc->Length();
52 if (end == -1)
53 end = lengthDoc;
54 int len = end - start;
56 PLATFORM_ASSERT(len >= 0);
57 PLATFORM_ASSERT(start + len <= lengthDoc);
59 int styleStart = 0;
60 if (start > 0)
61 styleStart = pdoc->StyleAt(start - 1) & pdoc->stylingBitsMask;
63 if (len > 0) {
64 instance->Lex(start, len, styleStart, pdoc);
65 instance->Fold(start, len, styleStart, pdoc);
68 performingStyle = false;
72 int LexInterface::LineEndTypesSupported() {
73 if (instance) {
74 int interfaceVersion = instance->Version();
75 if (interfaceVersion >= lvSubStyles) {
76 ILexerWithSubStyles *ssinstance = static_cast<ILexerWithSubStyles *>(instance);
77 return ssinstance->LineEndTypesSupported();
80 return 0;
83 Document::Document() {
84 refCount = 0;
85 pcf = NULL;
86 #ifdef _WIN32
87 eolMode = SC_EOL_CRLF;
88 #else
89 eolMode = SC_EOL_LF;
90 #endif
91 dbcsCodePage = 0;
92 lineEndBitSet = SC_LINE_END_TYPE_DEFAULT;
93 stylingBits = 5;
94 stylingBitsMask = 0x1F;
95 stylingMask = 0;
96 endStyled = 0;
97 styleClock = 0;
98 enteredModification = 0;
99 enteredStyling = 0;
100 enteredReadOnlyCount = 0;
101 tabInChars = 8;
102 indentInChars = 0;
103 actualIndentInChars = 8;
104 useTabs = true;
105 tabIndents = true;
106 backspaceUnindents = false;
108 matchesValid = false;
109 regex = 0;
111 UTF8BytesOfLeadInitialise();
113 perLineData[ldMarkers] = new LineMarkers();
114 perLineData[ldLevels] = new LineLevels();
115 perLineData[ldState] = new LineState();
116 perLineData[ldMargin] = new LineAnnotation();
117 perLineData[ldAnnotation] = new LineAnnotation();
119 cb.SetPerLine(this);
121 pli = 0;
124 Document::~Document() {
125 for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
126 it->watcher->NotifyDeleted(this, it->userData);
128 for (int j=0; j<ldSize; j++) {
129 delete perLineData[j];
130 perLineData[j] = 0;
132 delete regex;
133 regex = 0;
134 delete pli;
135 pli = 0;
136 delete pcf;
137 pcf = 0;
140 void Document::Init() {
141 for (int j=0; j<ldSize; j++) {
142 if (perLineData[j])
143 perLineData[j]->Init();
147 int Document::LineEndTypesSupported() const {
148 if ((SC_CP_UTF8 == dbcsCodePage) && pli)
149 return pli->LineEndTypesSupported();
150 else
151 return 0;
154 bool Document::SetDBCSCodePage(int dbcsCodePage_) {
155 if (dbcsCodePage != dbcsCodePage_) {
156 dbcsCodePage = dbcsCodePage_;
157 SetCaseFolder(NULL);
158 cb.SetLineEndTypes(lineEndBitSet & LineEndTypesSupported());
159 return true;
160 } else {
161 return false;
165 bool Document::SetLineEndTypesAllowed(int lineEndBitSet_) {
166 if (lineEndBitSet != lineEndBitSet_) {
167 lineEndBitSet = lineEndBitSet_;
168 int lineEndBitSetActive = lineEndBitSet & LineEndTypesSupported();
169 if (lineEndBitSetActive != cb.GetLineEndTypes()) {
170 ModifiedAt(0);
171 cb.SetLineEndTypes(lineEndBitSetActive);
172 return true;
173 } else {
174 return false;
176 } else {
177 return false;
181 void Document::InsertLine(int line) {
182 for (int j=0; j<ldSize; j++) {
183 if (perLineData[j])
184 perLineData[j]->InsertLine(line);
188 void Document::RemoveLine(int line) {
189 for (int j=0; j<ldSize; j++) {
190 if (perLineData[j])
191 perLineData[j]->RemoveLine(line);
195 // Increase reference count and return its previous value.
196 int Document::AddRef() {
197 return refCount++;
200 // Decrease reference count and return its previous value.
201 // Delete the document if reference count reaches zero.
202 int SCI_METHOD Document::Release() {
203 int curRefCount = --refCount;
204 if (curRefCount == 0)
205 delete this;
206 return curRefCount;
209 void Document::SetSavePoint() {
210 cb.SetSavePoint();
211 NotifySavePoint(true);
214 int Document::GetMark(int line) {
215 return static_cast<LineMarkers *>(perLineData[ldMarkers])->MarkValue(line);
218 int Document::MarkerNext(int lineStart, int mask) const {
219 return static_cast<LineMarkers *>(perLineData[ldMarkers])->MarkerNext(lineStart, mask);
222 int Document::AddMark(int line, int markerNum) {
223 if (line >= 0 && line <= LinesTotal()) {
224 int prev = static_cast<LineMarkers *>(perLineData[ldMarkers])->
225 AddMark(line, markerNum, LinesTotal());
226 DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, 0, line);
227 NotifyModified(mh);
228 return prev;
229 } else {
230 return 0;
234 void Document::AddMarkSet(int line, int valueSet) {
235 if (line < 0 || line > LinesTotal()) {
236 return;
238 unsigned int m = valueSet;
239 for (int i = 0; m; i++, m >>= 1)
240 if (m & 1)
241 static_cast<LineMarkers *>(perLineData[ldMarkers])->
242 AddMark(line, i, LinesTotal());
243 DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, 0, line);
244 NotifyModified(mh);
247 void Document::DeleteMark(int line, int markerNum) {
248 static_cast<LineMarkers *>(perLineData[ldMarkers])->DeleteMark(line, markerNum, false);
249 DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, 0, line);
250 NotifyModified(mh);
253 void Document::DeleteMarkFromHandle(int markerHandle) {
254 static_cast<LineMarkers *>(perLineData[ldMarkers])->DeleteMarkFromHandle(markerHandle);
255 DocModification mh(SC_MOD_CHANGEMARKER, 0, 0, 0, 0);
256 mh.line = -1;
257 NotifyModified(mh);
260 void Document::DeleteAllMarks(int markerNum) {
261 bool someChanges = false;
262 for (int line = 0; line < LinesTotal(); line++) {
263 if (static_cast<LineMarkers *>(perLineData[ldMarkers])->DeleteMark(line, markerNum, true))
264 someChanges = true;
266 if (someChanges) {
267 DocModification mh(SC_MOD_CHANGEMARKER, 0, 0, 0, 0);
268 mh.line = -1;
269 NotifyModified(mh);
273 int Document::LineFromHandle(int markerHandle) {
274 return static_cast<LineMarkers *>(perLineData[ldMarkers])->LineFromHandle(markerHandle);
277 int SCI_METHOD Document::LineStart(int line) const {
278 return cb.LineStart(line);
281 int SCI_METHOD Document::LineEnd(int line) const {
282 if (line >= LinesTotal() - 1) {
283 return LineStart(line + 1);
284 } else {
285 int position = LineStart(line + 1);
286 if (SC_CP_UTF8 == dbcsCodePage) {
287 unsigned char bytes[] = {
288 static_cast<unsigned char>(cb.CharAt(position-3)),
289 static_cast<unsigned char>(cb.CharAt(position-2)),
290 static_cast<unsigned char>(cb.CharAt(position-1)),
292 if (UTF8IsSeparator(bytes)) {
293 return position - UTF8SeparatorLength;
295 if (UTF8IsNEL(bytes+1)) {
296 return position - UTF8NELLength;
299 position--; // Back over CR or LF
300 // When line terminator is CR+LF, may need to go back one more
301 if ((position > LineStart(line)) && (cb.CharAt(position - 1) == '\r')) {
302 position--;
304 return position;
308 void SCI_METHOD Document::SetErrorStatus(int status) {
309 // Tell the watchers an error has occurred.
310 for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
311 it->watcher->NotifyErrorOccurred(this, it->userData, status);
315 int SCI_METHOD Document::LineFromPosition(int pos) const {
316 return cb.LineFromPosition(pos);
319 int Document::LineEndPosition(int position) const {
320 return LineEnd(LineFromPosition(position));
323 bool Document::IsLineEndPosition(int position) const {
324 return LineEnd(LineFromPosition(position)) == position;
327 bool Document::IsPositionInLineEnd(int position) const {
328 return position >= LineEnd(LineFromPosition(position));
331 int Document::VCHomePosition(int position) const {
332 int line = LineFromPosition(position);
333 int startPosition = LineStart(line);
334 int endLine = LineEnd(line);
335 int startText = startPosition;
336 while (startText < endLine && (cb.CharAt(startText) == ' ' || cb.CharAt(startText) == '\t'))
337 startText++;
338 if (position == startText)
339 return startPosition;
340 else
341 return startText;
344 int SCI_METHOD Document::SetLevel(int line, int level) {
345 int prev = static_cast<LineLevels *>(perLineData[ldLevels])->SetLevel(line, level, LinesTotal());
346 if (prev != level) {
347 DocModification mh(SC_MOD_CHANGEFOLD | SC_MOD_CHANGEMARKER,
348 LineStart(line), 0, 0, 0, line);
349 mh.foldLevelNow = level;
350 mh.foldLevelPrev = prev;
351 NotifyModified(mh);
353 return prev;
356 int SCI_METHOD Document::GetLevel(int line) const {
357 return static_cast<LineLevels *>(perLineData[ldLevels])->GetLevel(line);
360 void Document::ClearLevels() {
361 static_cast<LineLevels *>(perLineData[ldLevels])->ClearLevels();
364 static bool IsSubordinate(int levelStart, int levelTry) {
365 if (levelTry & SC_FOLDLEVELWHITEFLAG)
366 return true;
367 else
368 return (levelStart & SC_FOLDLEVELNUMBERMASK) < (levelTry & SC_FOLDLEVELNUMBERMASK);
371 int Document::GetLastChild(int lineParent, int level, int lastLine) {
372 if (level == -1)
373 level = GetLevel(lineParent) & SC_FOLDLEVELNUMBERMASK;
374 int maxLine = LinesTotal();
375 int lookLastLine = (lastLine != -1) ? Platform::Minimum(LinesTotal() - 1, lastLine) : -1;
376 int lineMaxSubord = lineParent;
377 while (lineMaxSubord < maxLine - 1) {
378 EnsureStyledTo(LineStart(lineMaxSubord + 2));
379 if (!IsSubordinate(level, GetLevel(lineMaxSubord + 1)))
380 break;
381 if ((lookLastLine != -1) && (lineMaxSubord >= lookLastLine) && !(GetLevel(lineMaxSubord) & SC_FOLDLEVELWHITEFLAG))
382 break;
383 lineMaxSubord++;
385 if (lineMaxSubord > lineParent) {
386 if (level > (GetLevel(lineMaxSubord + 1) & SC_FOLDLEVELNUMBERMASK)) {
387 // Have chewed up some whitespace that belongs to a parent so seek back
388 if (GetLevel(lineMaxSubord) & SC_FOLDLEVELWHITEFLAG) {
389 lineMaxSubord--;
393 return lineMaxSubord;
396 int Document::GetFoldParent(int line) const {
397 int level = GetLevel(line) & SC_FOLDLEVELNUMBERMASK;
398 int lineLook = line - 1;
399 while ((lineLook > 0) && (
400 (!(GetLevel(lineLook) & SC_FOLDLEVELHEADERFLAG)) ||
401 ((GetLevel(lineLook) & SC_FOLDLEVELNUMBERMASK) >= level))
403 lineLook--;
405 if ((GetLevel(lineLook) & SC_FOLDLEVELHEADERFLAG) &&
406 ((GetLevel(lineLook) & SC_FOLDLEVELNUMBERMASK) < level)) {
407 return lineLook;
408 } else {
409 return -1;
413 void Document::GetHighlightDelimiters(HighlightDelimiter &highlightDelimiter, int line, int lastLine) {
414 int level = GetLevel(line);
415 int lookLastLine = Platform::Maximum(line, lastLine) + 1;
417 int lookLine = line;
418 int lookLineLevel = level;
419 int lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
420 while ((lookLine > 0) && ((lookLineLevel & SC_FOLDLEVELWHITEFLAG) ||
421 ((lookLineLevel & SC_FOLDLEVELHEADERFLAG) && (lookLineLevelNum >= (GetLevel(lookLine + 1) & SC_FOLDLEVELNUMBERMASK))))) {
422 lookLineLevel = GetLevel(--lookLine);
423 lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
426 int beginFoldBlock = (lookLineLevel & SC_FOLDLEVELHEADERFLAG) ? lookLine : GetFoldParent(lookLine);
427 if (beginFoldBlock == -1) {
428 highlightDelimiter.Clear();
429 return;
432 int endFoldBlock = GetLastChild(beginFoldBlock, -1, lookLastLine);
433 int firstChangeableLineBefore = -1;
434 if (endFoldBlock < line) {
435 lookLine = beginFoldBlock - 1;
436 lookLineLevel = GetLevel(lookLine);
437 lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
438 while ((lookLine >= 0) && (lookLineLevelNum >= SC_FOLDLEVELBASE)) {
439 if (lookLineLevel & SC_FOLDLEVELHEADERFLAG) {
440 if (GetLastChild(lookLine, -1, lookLastLine) == line) {
441 beginFoldBlock = lookLine;
442 endFoldBlock = line;
443 firstChangeableLineBefore = line - 1;
446 if ((lookLine > 0) && (lookLineLevelNum == SC_FOLDLEVELBASE) && ((GetLevel(lookLine - 1) & SC_FOLDLEVELNUMBERMASK) > lookLineLevelNum))
447 break;
448 lookLineLevel = GetLevel(--lookLine);
449 lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
452 if (firstChangeableLineBefore == -1) {
453 for (lookLine = line - 1, lookLineLevel = GetLevel(lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
454 lookLine >= beginFoldBlock;
455 lookLineLevel = GetLevel(--lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK) {
456 if ((lookLineLevel & SC_FOLDLEVELWHITEFLAG) || (lookLineLevelNum > (level & SC_FOLDLEVELNUMBERMASK))) {
457 firstChangeableLineBefore = lookLine;
458 break;
462 if (firstChangeableLineBefore == -1)
463 firstChangeableLineBefore = beginFoldBlock - 1;
465 int firstChangeableLineAfter = -1;
466 for (lookLine = line + 1, lookLineLevel = GetLevel(lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
467 lookLine <= endFoldBlock;
468 lookLineLevel = GetLevel(++lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK) {
469 if ((lookLineLevel & SC_FOLDLEVELHEADERFLAG) && (lookLineLevelNum < (GetLevel(lookLine + 1) & SC_FOLDLEVELNUMBERMASK))) {
470 firstChangeableLineAfter = lookLine;
471 break;
474 if (firstChangeableLineAfter == -1)
475 firstChangeableLineAfter = endFoldBlock + 1;
477 highlightDelimiter.beginFoldBlock = beginFoldBlock;
478 highlightDelimiter.endFoldBlock = endFoldBlock;
479 highlightDelimiter.firstChangeableLineBefore = firstChangeableLineBefore;
480 highlightDelimiter.firstChangeableLineAfter = firstChangeableLineAfter;
483 int Document::ClampPositionIntoDocument(int pos) const {
484 return Platform::Clamp(pos, 0, Length());
487 bool Document::IsCrLf(int pos) const {
488 if (pos < 0)
489 return false;
490 if (pos >= (Length() - 1))
491 return false;
492 return (cb.CharAt(pos) == '\r') && (cb.CharAt(pos + 1) == '\n');
495 int Document::LenChar(int pos) {
496 if (pos < 0) {
497 return 1;
498 } else if (IsCrLf(pos)) {
499 return 2;
500 } else if (SC_CP_UTF8 == dbcsCodePage) {
501 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(pos));
502 const int widthCharBytes = UTF8BytesOfLead[leadByte];
503 int lengthDoc = Length();
504 if ((pos + widthCharBytes) > lengthDoc)
505 return lengthDoc - pos;
506 else
507 return widthCharBytes;
508 } else if (dbcsCodePage) {
509 return IsDBCSLeadByte(cb.CharAt(pos)) ? 2 : 1;
510 } else {
511 return 1;
515 bool Document::InGoodUTF8(int pos, int &start, int &end) const {
516 int trail = pos;
517 while ((trail>0) && (pos-trail < UTF8MaxBytes) && UTF8IsTrailByte(static_cast<unsigned char>(cb.CharAt(trail-1))))
518 trail--;
519 start = (trail > 0) ? trail-1 : trail;
521 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(start));
522 const int widthCharBytes = UTF8BytesOfLead[leadByte];
523 if (widthCharBytes == 1) {
524 return false;
525 } else {
526 int trailBytes = widthCharBytes - 1;
527 int len = pos - start;
528 if (len > trailBytes)
529 // pos too far from lead
530 return false;
531 char charBytes[UTF8MaxBytes] = {static_cast<char>(leadByte),0,0,0};
532 for (int b=1; b<widthCharBytes && ((start+b) < Length()); b++)
533 charBytes[b] = cb.CharAt(static_cast<int>(start+b));
534 int utf8status = UTF8Classify(reinterpret_cast<const unsigned char *>(charBytes), widthCharBytes);
535 if (utf8status & UTF8MaskInvalid)
536 return false;
537 end = start + widthCharBytes;
538 return true;
542 // Normalise a position so that it is not halfway through a two byte character.
543 // This can occur in two situations -
544 // When lines are terminated with \r\n pairs which should be treated as one character.
545 // When displaying DBCS text such as Japanese.
546 // If moving, move the position in the indicated direction.
547 int Document::MovePositionOutsideChar(int pos, int moveDir, bool checkLineEnd) {
548 //Platform::DebugPrintf("NoCRLF %d %d\n", pos, moveDir);
549 // If out of range, just return minimum/maximum value.
550 if (pos <= 0)
551 return 0;
552 if (pos >= Length())
553 return Length();
555 // PLATFORM_ASSERT(pos > 0 && pos < Length());
556 if (checkLineEnd && IsCrLf(pos - 1)) {
557 if (moveDir > 0)
558 return pos + 1;
559 else
560 return pos - 1;
563 if (dbcsCodePage) {
564 if (SC_CP_UTF8 == dbcsCodePage) {
565 unsigned char ch = static_cast<unsigned char>(cb.CharAt(pos));
566 // If ch is not a trail byte then pos is valid intercharacter position
567 if (UTF8IsTrailByte(ch)) {
568 int startUTF = pos;
569 int endUTF = pos;
570 if (InGoodUTF8(pos, startUTF, endUTF)) {
571 // ch is a trail byte within a UTF-8 character
572 if (moveDir > 0)
573 pos = endUTF;
574 else
575 pos = startUTF;
577 // Else invalid UTF-8 so return position of isolated trail byte
579 } else {
580 // Anchor DBCS calculations at start of line because start of line can
581 // not be a DBCS trail byte.
582 int posStartLine = LineStart(LineFromPosition(pos));
583 if (pos == posStartLine)
584 return pos;
586 // Step back until a non-lead-byte is found.
587 int posCheck = pos;
588 while ((posCheck > posStartLine) && IsDBCSLeadByte(cb.CharAt(posCheck-1)))
589 posCheck--;
591 // Check from known start of character.
592 while (posCheck < pos) {
593 int mbsize = IsDBCSLeadByte(cb.CharAt(posCheck)) ? 2 : 1;
594 if (posCheck + mbsize == pos) {
595 return pos;
596 } else if (posCheck + mbsize > pos) {
597 if (moveDir > 0) {
598 return posCheck + mbsize;
599 } else {
600 return posCheck;
603 posCheck += mbsize;
608 return pos;
611 // NextPosition moves between valid positions - it can not handle a position in the middle of a
612 // multi-byte character. It is used to iterate through text more efficiently than MovePositionOutsideChar.
613 // A \r\n pair is treated as two characters.
614 int Document::NextPosition(int pos, int moveDir) const {
615 // If out of range, just return minimum/maximum value.
616 int increment = (moveDir > 0) ? 1 : -1;
617 if (pos + increment <= 0)
618 return 0;
619 if (pos + increment >= Length())
620 return Length();
622 if (dbcsCodePage) {
623 if (SC_CP_UTF8 == dbcsCodePage) {
624 if (increment == 1) {
625 // Simple forward movement case so can avoid some checks
626 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(pos));
627 if (UTF8IsAscii(leadByte)) {
628 // Single byte character or invalid
629 pos++;
630 } else {
631 const int widthCharBytes = UTF8BytesOfLead[leadByte];
632 char charBytes[UTF8MaxBytes] = {static_cast<char>(leadByte),0,0,0};
633 for (int b=1; b<widthCharBytes; b++)
634 charBytes[b] = cb.CharAt(static_cast<int>(pos+b));
635 int utf8status = UTF8Classify(reinterpret_cast<const unsigned char *>(charBytes), widthCharBytes);
636 if (utf8status & UTF8MaskInvalid)
637 pos++;
638 else
639 pos += utf8status & UTF8MaskWidth;
641 } else {
642 // Examine byte before position
643 pos--;
644 unsigned char ch = static_cast<unsigned char>(cb.CharAt(pos));
645 // If ch is not a trail byte then pos is valid intercharacter position
646 if (UTF8IsTrailByte(ch)) {
647 // If ch is a trail byte in a valid UTF-8 character then return start of character
648 int startUTF = pos;
649 int endUTF = pos;
650 if (InGoodUTF8(pos, startUTF, endUTF)) {
651 pos = startUTF;
653 // Else invalid UTF-8 so return position of isolated trail byte
656 } else {
657 if (moveDir > 0) {
658 int mbsize = IsDBCSLeadByte(cb.CharAt(pos)) ? 2 : 1;
659 pos += mbsize;
660 if (pos > Length())
661 pos = Length();
662 } else {
663 // Anchor DBCS calculations at start of line because start of line can
664 // not be a DBCS trail byte.
665 int posStartLine = LineStart(LineFromPosition(pos));
666 // See http://msdn.microsoft.com/en-us/library/cc194792%28v=MSDN.10%29.aspx
667 // http://msdn.microsoft.com/en-us/library/cc194790.aspx
668 if ((pos - 1) <= posStartLine) {
669 return pos - 1;
670 } else if (IsDBCSLeadByte(cb.CharAt(pos - 1))) {
671 // Must actually be trail byte
672 return pos - 2;
673 } else {
674 // Otherwise, step back until a non-lead-byte is found.
675 int posTemp = pos - 1;
676 while (posStartLine <= --posTemp && IsDBCSLeadByte(cb.CharAt(posTemp)))
678 // Now posTemp+1 must point to the beginning of a character,
679 // so figure out whether we went back an even or an odd
680 // number of bytes and go back 1 or 2 bytes, respectively.
681 return (pos - 1 - ((pos - posTemp) & 1));
685 } else {
686 pos += increment;
689 return pos;
692 bool Document::NextCharacter(int &pos, int moveDir) const {
693 // Returns true if pos changed
694 int posNext = NextPosition(pos, moveDir);
695 if (posNext == pos) {
696 return false;
697 } else {
698 pos = posNext;
699 return true;
703 static inline int UnicodeFromBytes(const unsigned char *us) {
704 if (us[0] < 0xC2) {
705 return us[0];
706 } else if (us[0] < 0xE0) {
707 return ((us[0] & 0x1F) << 6) + (us[1] & 0x3F);
708 } else if (us[0] < 0xF0) {
709 return ((us[0] & 0xF) << 12) + ((us[1] & 0x3F) << 6) + (us[2] & 0x3F);
710 } else if (us[0] < 0xF5) {
711 return ((us[0] & 0x7) << 18) + ((us[1] & 0x3F) << 12) + ((us[2] & 0x3F) << 6) + (us[3] & 0x3F);
713 return us[0];
716 // Return -1 on out-of-bounds
717 int SCI_METHOD Document::GetRelativePosition(int positionStart, int characterOffset) const {
718 int pos = positionStart;
719 if (dbcsCodePage) {
720 const int increment = (characterOffset > 0) ? 1 : -1;
721 while (characterOffset != 0) {
722 const int posNext = NextPosition(pos, increment);
723 if (posNext == pos)
724 return INVALID_POSITION;
725 pos = posNext;
726 characterOffset -= increment;
728 } else {
729 pos = positionStart + characterOffset;
730 if ((pos < 0) || (pos > Length()))
731 return INVALID_POSITION;
733 return pos;
736 int SCI_METHOD Document::GetCharacterAndWidth(int position, int *pWidth) const {
737 int character;
738 int bytesInCharacter = 1;
739 if (dbcsCodePage) {
740 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(position));
741 if (SC_CP_UTF8 == dbcsCodePage) {
742 if (UTF8IsAscii(leadByte)) {
743 // Single byte character or invalid
744 character = leadByte;
745 } else {
746 const int widthCharBytes = UTF8BytesOfLead[leadByte];
747 unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0};
748 for (int b=1; b<widthCharBytes; b++)
749 charBytes[b] = static_cast<unsigned char>(cb.CharAt(position+b));
750 int utf8status = UTF8Classify(charBytes, widthCharBytes);
751 if (utf8status & UTF8MaskInvalid) {
752 // Report as singleton surrogate values which are invalid Unicode
753 character = 0xDC80 + leadByte;
754 } else {
755 bytesInCharacter = utf8status & UTF8MaskWidth;
756 character = UnicodeFromBytes(charBytes);
759 } else {
760 if (IsDBCSLeadByte(leadByte)) {
761 bytesInCharacter = 2;
762 character = (leadByte << 8) | static_cast<unsigned char>(cb.CharAt(position+1));
763 } else {
764 character = leadByte;
767 } else {
768 character = cb.CharAt(position);
770 if (pWidth) {
771 *pWidth = bytesInCharacter;
773 return character;
776 int SCI_METHOD Document::CodePage() const {
777 return dbcsCodePage;
780 bool SCI_METHOD Document::IsDBCSLeadByte(char ch) const {
781 // Byte ranges found in Wikipedia articles with relevant search strings in each case
782 unsigned char uch = static_cast<unsigned char>(ch);
783 switch (dbcsCodePage) {
784 case 932:
785 // Shift_jis
786 return ((uch >= 0x81) && (uch <= 0x9F)) ||
787 ((uch >= 0xE0) && (uch <= 0xFC));
788 // Lead bytes F0 to FC may be a Microsoft addition.
789 case 936:
790 // GBK
791 return (uch >= 0x81) && (uch <= 0xFE);
792 case 949:
793 // Korean Wansung KS C-5601-1987
794 return (uch >= 0x81) && (uch <= 0xFE);
795 case 950:
796 // Big5
797 return (uch >= 0x81) && (uch <= 0xFE);
798 case 1361:
799 // Korean Johab KS C-5601-1992
800 return
801 ((uch >= 0x84) && (uch <= 0xD3)) ||
802 ((uch >= 0xD8) && (uch <= 0xDE)) ||
803 ((uch >= 0xE0) && (uch <= 0xF9));
805 return false;
808 static inline bool IsSpaceOrTab(int ch) {
809 return ch == ' ' || ch == '\t';
812 // Need to break text into segments near lengthSegment but taking into
813 // account the encoding to not break inside a UTF-8 or DBCS character
814 // and also trying to avoid breaking inside a pair of combining characters.
815 // The segment length must always be long enough (more than 4 bytes)
816 // so that there will be at least one whole character to make a segment.
817 // For UTF-8, text must consist only of valid whole characters.
818 // In preference order from best to worst:
819 // 1) Break after space
820 // 2) Break before punctuation
821 // 3) Break after whole character
823 int Document::SafeSegment(const char *text, int length, int lengthSegment) const {
824 if (length <= lengthSegment)
825 return length;
826 int lastSpaceBreak = -1;
827 int lastPunctuationBreak = -1;
828 int lastEncodingAllowedBreak = 0;
829 for (int j=0; j < lengthSegment;) {
830 unsigned char ch = static_cast<unsigned char>(text[j]);
831 if (j > 0) {
832 if (IsSpaceOrTab(text[j - 1]) && !IsSpaceOrTab(text[j])) {
833 lastSpaceBreak = j;
835 if (ch < 'A') {
836 lastPunctuationBreak = j;
839 lastEncodingAllowedBreak = j;
841 if (dbcsCodePage == SC_CP_UTF8) {
842 j += UTF8BytesOfLead[ch];
843 } else if (dbcsCodePage) {
844 j += IsDBCSLeadByte(ch) ? 2 : 1;
845 } else {
846 j++;
849 if (lastSpaceBreak >= 0) {
850 return lastSpaceBreak;
851 } else if (lastPunctuationBreak >= 0) {
852 return lastPunctuationBreak;
854 return lastEncodingAllowedBreak;
857 EncodingFamily Document::CodePageFamily() const {
858 if (SC_CP_UTF8 == dbcsCodePage)
859 return efUnicode;
860 else if (dbcsCodePage)
861 return efDBCS;
862 else
863 return efEightBit;
866 void Document::ModifiedAt(int pos) {
867 if (endStyled > pos)
868 endStyled = pos;
871 void Document::CheckReadOnly() {
872 if (cb.IsReadOnly() && enteredReadOnlyCount == 0) {
873 enteredReadOnlyCount++;
874 NotifyModifyAttempt();
875 enteredReadOnlyCount--;
879 // Document only modified by gateways DeleteChars, InsertString, Undo, Redo, and SetStyleAt.
880 // SetStyleAt does not change the persistent state of a document
882 bool Document::DeleteChars(int pos, int len) {
883 if (len <= 0)
884 return false;
885 if ((pos + len) > Length())
886 return false;
887 CheckReadOnly();
888 if (enteredModification != 0) {
889 return false;
890 } else {
891 enteredModification++;
892 if (!cb.IsReadOnly()) {
893 NotifyModified(
894 DocModification(
895 SC_MOD_BEFOREDELETE | SC_PERFORMED_USER,
896 pos, len,
897 0, 0));
898 int prevLinesTotal = LinesTotal();
899 bool startSavePoint = cb.IsSavePoint();
900 bool startSequence = false;
901 const char *text = cb.DeleteChars(pos, len, startSequence);
902 if (startSavePoint && cb.IsCollectingUndo())
903 NotifySavePoint(!startSavePoint);
904 if ((pos < Length()) || (pos == 0))
905 ModifiedAt(pos);
906 else
907 ModifiedAt(pos-1);
908 NotifyModified(
909 DocModification(
910 SC_MOD_DELETETEXT | SC_PERFORMED_USER | (startSequence?SC_STARTACTION:0),
911 pos, len,
912 LinesTotal() - prevLinesTotal, text));
914 enteredModification--;
916 return !cb.IsReadOnly();
920 * Insert a string with a length.
922 bool Document::InsertString(int position, const char *s, int insertLength) {
923 if (insertLength <= 0) {
924 return false;
926 CheckReadOnly();
927 if (enteredModification != 0) {
928 return false;
929 } else {
930 enteredModification++;
931 if (!cb.IsReadOnly()) {
932 NotifyModified(
933 DocModification(
934 SC_MOD_BEFOREINSERT | SC_PERFORMED_USER,
935 position, insertLength,
936 0, s));
937 int prevLinesTotal = LinesTotal();
938 bool startSavePoint = cb.IsSavePoint();
939 bool startSequence = false;
940 const char *text = cb.InsertString(position, s, insertLength, startSequence);
941 if (startSavePoint && cb.IsCollectingUndo())
942 NotifySavePoint(!startSavePoint);
943 ModifiedAt(position);
944 NotifyModified(
945 DocModification(
946 SC_MOD_INSERTTEXT | SC_PERFORMED_USER | (startSequence?SC_STARTACTION:0),
947 position, insertLength,
948 LinesTotal() - prevLinesTotal, text));
950 enteredModification--;
952 return !cb.IsReadOnly();
955 int SCI_METHOD Document::AddData(char *data, int length) {
956 try {
957 int position = Length();
958 InsertString(position, data, length);
959 } catch (std::bad_alloc &) {
960 return SC_STATUS_BADALLOC;
961 } catch (...) {
962 return SC_STATUS_FAILURE;
964 return 0;
967 void * SCI_METHOD Document::ConvertToDocument() {
968 return this;
971 int Document::Undo() {
972 int newPos = -1;
973 CheckReadOnly();
974 if ((enteredModification == 0) && (cb.IsCollectingUndo())) {
975 enteredModification++;
976 if (!cb.IsReadOnly()) {
977 bool startSavePoint = cb.IsSavePoint();
978 bool multiLine = false;
979 int steps = cb.StartUndo();
980 //Platform::DebugPrintf("Steps=%d\n", steps);
981 int coalescedRemovePos = -1;
982 int coalescedRemoveLen = 0;
983 int prevRemoveActionPos = -1;
984 int prevRemoveActionLen = 0;
985 for (int step = 0; step < steps; step++) {
986 const int prevLinesTotal = LinesTotal();
987 const Action &action = cb.GetUndoStep();
988 if (action.at == removeAction) {
989 NotifyModified(DocModification(
990 SC_MOD_BEFOREINSERT | SC_PERFORMED_UNDO, action));
991 } else if (action.at == containerAction) {
992 DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_UNDO);
993 dm.token = action.position;
994 NotifyModified(dm);
995 if (!action.mayCoalesce) {
996 coalescedRemovePos = -1;
997 coalescedRemoveLen = 0;
998 prevRemoveActionPos = -1;
999 prevRemoveActionLen = 0;
1001 } else {
1002 NotifyModified(DocModification(
1003 SC_MOD_BEFOREDELETE | SC_PERFORMED_UNDO, action));
1005 cb.PerformUndoStep();
1006 if (action.at != containerAction) {
1007 ModifiedAt(action.position);
1008 newPos = action.position;
1011 int modFlags = SC_PERFORMED_UNDO;
1012 // With undo, an insertion action becomes a deletion notification
1013 if (action.at == removeAction) {
1014 newPos += action.lenData;
1015 modFlags |= SC_MOD_INSERTTEXT;
1016 if ((coalescedRemoveLen > 0) &&
1017 (action.position == prevRemoveActionPos || action.position == (prevRemoveActionPos + prevRemoveActionLen))) {
1018 coalescedRemoveLen += action.lenData;
1019 newPos = coalescedRemovePos + coalescedRemoveLen;
1020 } else {
1021 coalescedRemovePos = action.position;
1022 coalescedRemoveLen = action.lenData;
1024 prevRemoveActionPos = action.position;
1025 prevRemoveActionLen = action.lenData;
1026 } else if (action.at == insertAction) {
1027 modFlags |= SC_MOD_DELETETEXT;
1028 coalescedRemovePos = -1;
1029 coalescedRemoveLen = 0;
1030 prevRemoveActionPos = -1;
1031 prevRemoveActionLen = 0;
1033 if (steps > 1)
1034 modFlags |= SC_MULTISTEPUNDOREDO;
1035 const int linesAdded = LinesTotal() - prevLinesTotal;
1036 if (linesAdded != 0)
1037 multiLine = true;
1038 if (step == steps - 1) {
1039 modFlags |= SC_LASTSTEPINUNDOREDO;
1040 if (multiLine)
1041 modFlags |= SC_MULTILINEUNDOREDO;
1043 NotifyModified(DocModification(modFlags, action.position, action.lenData,
1044 linesAdded, action.data));
1047 bool endSavePoint = cb.IsSavePoint();
1048 if (startSavePoint != endSavePoint)
1049 NotifySavePoint(endSavePoint);
1051 enteredModification--;
1053 return newPos;
1056 int Document::Redo() {
1057 int newPos = -1;
1058 CheckReadOnly();
1059 if ((enteredModification == 0) && (cb.IsCollectingUndo())) {
1060 enteredModification++;
1061 if (!cb.IsReadOnly()) {
1062 bool startSavePoint = cb.IsSavePoint();
1063 bool multiLine = false;
1064 int steps = cb.StartRedo();
1065 for (int step = 0; step < steps; step++) {
1066 const int prevLinesTotal = LinesTotal();
1067 const Action &action = cb.GetRedoStep();
1068 if (action.at == insertAction) {
1069 NotifyModified(DocModification(
1070 SC_MOD_BEFOREINSERT | SC_PERFORMED_REDO, action));
1071 } else if (action.at == containerAction) {
1072 DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_REDO);
1073 dm.token = action.position;
1074 NotifyModified(dm);
1075 } else {
1076 NotifyModified(DocModification(
1077 SC_MOD_BEFOREDELETE | SC_PERFORMED_REDO, action));
1079 cb.PerformRedoStep();
1080 if (action.at != containerAction) {
1081 ModifiedAt(action.position);
1082 newPos = action.position;
1085 int modFlags = SC_PERFORMED_REDO;
1086 if (action.at == insertAction) {
1087 newPos += action.lenData;
1088 modFlags |= SC_MOD_INSERTTEXT;
1089 } else if (action.at == removeAction) {
1090 modFlags |= SC_MOD_DELETETEXT;
1092 if (steps > 1)
1093 modFlags |= SC_MULTISTEPUNDOREDO;
1094 const int linesAdded = LinesTotal() - prevLinesTotal;
1095 if (linesAdded != 0)
1096 multiLine = true;
1097 if (step == steps - 1) {
1098 modFlags |= SC_LASTSTEPINUNDOREDO;
1099 if (multiLine)
1100 modFlags |= SC_MULTILINEUNDOREDO;
1102 NotifyModified(
1103 DocModification(modFlags, action.position, action.lenData,
1104 linesAdded, action.data));
1107 bool endSavePoint = cb.IsSavePoint();
1108 if (startSavePoint != endSavePoint)
1109 NotifySavePoint(endSavePoint);
1111 enteredModification--;
1113 return newPos;
1117 * Insert a single character.
1119 bool Document::InsertChar(int pos, char ch) {
1120 char chs[1];
1121 chs[0] = ch;
1122 return InsertString(pos, chs, 1);
1126 * Insert a null terminated string.
1128 bool Document::InsertCString(int position, const char *s) {
1129 return InsertString(position, s, static_cast<int>(s ? strlen(s) : 0));
1132 void Document::DelChar(int pos) {
1133 DeleteChars(pos, LenChar(pos));
1136 void Document::DelCharBack(int pos) {
1137 if (pos <= 0) {
1138 return;
1139 } else if (IsCrLf(pos - 2)) {
1140 DeleteChars(pos - 2, 2);
1141 } else if (dbcsCodePage) {
1142 int startChar = NextPosition(pos, -1);
1143 DeleteChars(startChar, pos - startChar);
1144 } else {
1145 DeleteChars(pos - 1, 1);
1149 static int NextTab(int pos, int tabSize) {
1150 return ((pos / tabSize) + 1) * tabSize;
1153 static std::string CreateIndentation(int indent, int tabSize, bool insertSpaces) {
1154 std::string indentation;
1155 if (!insertSpaces) {
1156 while (indent >= tabSize) {
1157 indentation += '\t';
1158 indent -= tabSize;
1161 while (indent > 0) {
1162 indentation += ' ';
1163 indent--;
1165 return indentation;
1168 int SCI_METHOD Document::GetLineIndentation(int line) {
1169 int indent = 0;
1170 if ((line >= 0) && (line < LinesTotal())) {
1171 int lineStart = LineStart(line);
1172 int length = Length();
1173 for (int i = lineStart; i < length; i++) {
1174 char ch = cb.CharAt(i);
1175 if (ch == ' ')
1176 indent++;
1177 else if (ch == '\t')
1178 indent = NextTab(indent, tabInChars);
1179 else
1180 return indent;
1183 return indent;
1186 void Document::SetLineIndentation(int line, int indent) {
1187 int indentOfLine = GetLineIndentation(line);
1188 if (indent < 0)
1189 indent = 0;
1190 if (indent != indentOfLine) {
1191 std::string linebuf = CreateIndentation(indent, tabInChars, !useTabs);
1192 int thisLineStart = LineStart(line);
1193 int indentPos = GetLineIndentPosition(line);
1194 UndoGroup ug(this);
1195 DeleteChars(thisLineStart, indentPos - thisLineStart);
1196 InsertCString(thisLineStart, linebuf.c_str());
1200 int Document::GetLineIndentPosition(int line) const {
1201 if (line < 0)
1202 return 0;
1203 int pos = LineStart(line);
1204 int length = Length();
1205 while ((pos < length) && IsSpaceOrTab(cb.CharAt(pos))) {
1206 pos++;
1208 return pos;
1211 int Document::GetColumn(int pos) {
1212 int column = 0;
1213 int line = LineFromPosition(pos);
1214 if ((line >= 0) && (line < LinesTotal())) {
1215 for (int i = LineStart(line); i < pos;) {
1216 char ch = cb.CharAt(i);
1217 if (ch == '\t') {
1218 column = NextTab(column, tabInChars);
1219 i++;
1220 } else if (ch == '\r') {
1221 return column;
1222 } else if (ch == '\n') {
1223 return column;
1224 } else if (i >= Length()) {
1225 return column;
1226 } else {
1227 column++;
1228 i = NextPosition(i, 1);
1232 return column;
1235 int Document::CountCharacters(int startPos, int endPos) {
1236 startPos = MovePositionOutsideChar(startPos, 1, false);
1237 endPos = MovePositionOutsideChar(endPos, -1, false);
1238 int count = 0;
1239 int i = startPos;
1240 while (i < endPos) {
1241 count++;
1242 if (IsCrLf(i))
1243 i++;
1244 i = NextPosition(i, 1);
1246 return count;
1249 int Document::FindColumn(int line, int column) {
1250 int position = LineStart(line);
1251 if ((line >= 0) && (line < LinesTotal())) {
1252 int columnCurrent = 0;
1253 while ((columnCurrent < column) && (position < Length())) {
1254 char ch = cb.CharAt(position);
1255 if (ch == '\t') {
1256 columnCurrent = NextTab(columnCurrent, tabInChars);
1257 if (columnCurrent > column)
1258 return position;
1259 position++;
1260 } else if (ch == '\r') {
1261 return position;
1262 } else if (ch == '\n') {
1263 return position;
1264 } else {
1265 columnCurrent++;
1266 position = NextPosition(position, 1);
1270 return position;
1273 void Document::Indent(bool forwards, int lineBottom, int lineTop) {
1274 // Dedent - suck white space off the front of the line to dedent by equivalent of a tab
1275 for (int line = lineBottom; line >= lineTop; line--) {
1276 int indentOfLine = GetLineIndentation(line);
1277 if (forwards) {
1278 if (LineStart(line) < LineEnd(line)) {
1279 SetLineIndentation(line, indentOfLine + IndentSize());
1281 } else {
1282 SetLineIndentation(line, indentOfLine - IndentSize());
1287 // Convert line endings for a piece of text to a particular mode.
1288 // Stop at len or when a NUL is found.
1289 std::string Document::TransformLineEnds(const char *s, size_t len, int eolModeWanted) {
1290 std::string dest;
1291 for (size_t i = 0; (i < len) && (s[i]); i++) {
1292 if (s[i] == '\n' || s[i] == '\r') {
1293 if (eolModeWanted == SC_EOL_CR) {
1294 dest.push_back('\r');
1295 } else if (eolModeWanted == SC_EOL_LF) {
1296 dest.push_back('\n');
1297 } else { // eolModeWanted == SC_EOL_CRLF
1298 dest.push_back('\r');
1299 dest.push_back('\n');
1301 if ((s[i] == '\r') && (i+1 < len) && (s[i+1] == '\n')) {
1302 i++;
1304 } else {
1305 dest.push_back(s[i]);
1308 return dest;
1311 void Document::ConvertLineEnds(int eolModeSet) {
1312 UndoGroup ug(this);
1314 for (int pos = 0; pos < Length(); pos++) {
1315 if (cb.CharAt(pos) == '\r') {
1316 if (cb.CharAt(pos + 1) == '\n') {
1317 // CRLF
1318 if (eolModeSet == SC_EOL_CR) {
1319 DeleteChars(pos + 1, 1); // Delete the LF
1320 } else if (eolModeSet == SC_EOL_LF) {
1321 DeleteChars(pos, 1); // Delete the CR
1322 } else {
1323 pos++;
1325 } else {
1326 // CR
1327 if (eolModeSet == SC_EOL_CRLF) {
1328 InsertString(pos + 1, "\n", 1); // Insert LF
1329 pos++;
1330 } else if (eolModeSet == SC_EOL_LF) {
1331 InsertString(pos, "\n", 1); // Insert LF
1332 DeleteChars(pos + 1, 1); // Delete CR
1335 } else if (cb.CharAt(pos) == '\n') {
1336 // LF
1337 if (eolModeSet == SC_EOL_CRLF) {
1338 InsertString(pos, "\r", 1); // Insert CR
1339 pos++;
1340 } else if (eolModeSet == SC_EOL_CR) {
1341 InsertString(pos, "\r", 1); // Insert CR
1342 DeleteChars(pos + 1, 1); // Delete LF
1349 bool Document::IsWhiteLine(int line) const {
1350 int currentChar = LineStart(line);
1351 int endLine = LineEnd(line);
1352 while (currentChar < endLine) {
1353 if (cb.CharAt(currentChar) != ' ' && cb.CharAt(currentChar) != '\t') {
1354 return false;
1356 ++currentChar;
1358 return true;
1361 int Document::ParaUp(int pos) const {
1362 int line = LineFromPosition(pos);
1363 line--;
1364 while (line >= 0 && IsWhiteLine(line)) { // skip empty lines
1365 line--;
1367 while (line >= 0 && !IsWhiteLine(line)) { // skip non-empty lines
1368 line--;
1370 line++;
1371 return LineStart(line);
1374 int Document::ParaDown(int pos) const {
1375 int line = LineFromPosition(pos);
1376 while (line < LinesTotal() && !IsWhiteLine(line)) { // skip non-empty lines
1377 line++;
1379 while (line < LinesTotal() && IsWhiteLine(line)) { // skip empty lines
1380 line++;
1382 if (line < LinesTotal())
1383 return LineStart(line);
1384 else // end of a document
1385 return LineEnd(line-1);
1388 CharClassify::cc Document::WordCharClass(unsigned char ch) const {
1389 if ((SC_CP_UTF8 == dbcsCodePage) && (!UTF8IsAscii(ch)))
1390 return CharClassify::ccWord;
1391 return charClass.GetClass(ch);
1395 * Used by commmands that want to select whole words.
1396 * Finds the start of word at pos when delta < 0 or the end of the word when delta >= 0.
1398 int Document::ExtendWordSelect(int pos, int delta, bool onlyWordCharacters) {
1399 CharClassify::cc ccStart = CharClassify::ccWord;
1400 if (delta < 0) {
1401 if (!onlyWordCharacters)
1402 ccStart = WordCharClass(cb.CharAt(pos-1));
1403 while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == ccStart))
1404 pos--;
1405 } else {
1406 if (!onlyWordCharacters && pos < Length())
1407 ccStart = WordCharClass(cb.CharAt(pos));
1408 while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == ccStart))
1409 pos++;
1411 return MovePositionOutsideChar(pos, delta, true);
1415 * Find the start of the next word in either a forward (delta >= 0) or backwards direction
1416 * (delta < 0).
1417 * This is looking for a transition between character classes although there is also some
1418 * additional movement to transit white space.
1419 * Used by cursor movement by word commands.
1421 int Document::NextWordStart(int pos, int delta) {
1422 if (delta < 0) {
1423 while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == CharClassify::ccSpace))
1424 pos--;
1425 if (pos > 0) {
1426 CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos-1));
1427 while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == ccStart)) {
1428 pos--;
1431 } else {
1432 CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos));
1433 while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == ccStart))
1434 pos++;
1435 while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == CharClassify::ccSpace))
1436 pos++;
1438 return pos;
1442 * Find the end of the next word in either a forward (delta >= 0) or backwards direction
1443 * (delta < 0).
1444 * This is looking for a transition between character classes although there is also some
1445 * additional movement to transit white space.
1446 * Used by cursor movement by word commands.
1448 int Document::NextWordEnd(int pos, int delta) {
1449 if (delta < 0) {
1450 if (pos > 0) {
1451 CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos-1));
1452 if (ccStart != CharClassify::ccSpace) {
1453 while (pos > 0 && WordCharClass(cb.CharAt(pos - 1)) == ccStart) {
1454 pos--;
1457 while (pos > 0 && WordCharClass(cb.CharAt(pos - 1)) == CharClassify::ccSpace) {
1458 pos--;
1461 } else {
1462 while (pos < Length() && WordCharClass(cb.CharAt(pos)) == CharClassify::ccSpace) {
1463 pos++;
1465 if (pos < Length()) {
1466 CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos));
1467 while (pos < Length() && WordCharClass(cb.CharAt(pos)) == ccStart) {
1468 pos++;
1472 return pos;
1476 * Check that the character at the given position is a word or punctuation character and that
1477 * the previous character is of a different character class.
1479 bool Document::IsWordStartAt(int pos) const {
1480 if (pos > 0) {
1481 CharClassify::cc ccPos = WordCharClass(CharAt(pos));
1482 return (ccPos == CharClassify::ccWord || ccPos == CharClassify::ccPunctuation) &&
1483 (ccPos != WordCharClass(CharAt(pos - 1)));
1485 return true;
1489 * Check that the character at the given position is a word or punctuation character and that
1490 * the next character is of a different character class.
1492 bool Document::IsWordEndAt(int pos) const {
1493 if (pos < Length()) {
1494 CharClassify::cc ccPrev = WordCharClass(CharAt(pos-1));
1495 return (ccPrev == CharClassify::ccWord || ccPrev == CharClassify::ccPunctuation) &&
1496 (ccPrev != WordCharClass(CharAt(pos)));
1498 return true;
1502 * Check that the given range is has transitions between character classes at both
1503 * ends and where the characters on the inside are word or punctuation characters.
1505 bool Document::IsWordAt(int start, int end) const {
1506 return IsWordStartAt(start) && IsWordEndAt(end);
1509 bool Document::MatchesWordOptions(bool word, bool wordStart, int pos, int length) const {
1510 return (!word && !wordStart) ||
1511 (word && IsWordAt(pos, pos + length)) ||
1512 (wordStart && IsWordStartAt(pos));
1515 bool Document::HasCaseFolder(void) const {
1516 return pcf != 0;
1519 void Document::SetCaseFolder(CaseFolder *pcf_) {
1520 delete pcf;
1521 pcf = pcf_;
1525 * Find text in document, supporting both forward and backward
1526 * searches (just pass minPos > maxPos to do a backward search)
1527 * Has not been tested with backwards DBCS searches yet.
1529 long Document::FindText(int minPos, int maxPos, const char *search,
1530 bool caseSensitive, bool word, bool wordStart, bool regExp, int flags,
1531 int *length) {
1532 if (*length <= 0)
1533 return minPos;
1534 if (regExp) {
1535 if (!regex)
1536 regex = CreateRegexSearch(&charClass);
1537 return regex->FindText(this, minPos, maxPos, search, caseSensitive, word, wordStart, flags, length);
1538 } else {
1540 const bool forward = minPos <= maxPos;
1541 const int increment = forward ? 1 : -1;
1543 // Range endpoints should not be inside DBCS characters, but just in case, move them.
1544 const int startPos = MovePositionOutsideChar(minPos, increment, false);
1545 const int endPos = MovePositionOutsideChar(maxPos, increment, false);
1547 // Compute actual search ranges needed
1548 const int lengthFind = *length;
1550 //Platform::DebugPrintf("Find %d %d %s %d\n", startPos, endPos, ft->lpstrText, lengthFind);
1551 const int limitPos = Platform::Maximum(startPos, endPos);
1552 int pos = startPos;
1553 if (!forward) {
1554 // Back all of a character
1555 pos = NextPosition(pos, increment);
1557 if (caseSensitive) {
1558 const int endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos;
1559 const char charStartSearch = search[0];
1560 while (forward ? (pos < endSearch) : (pos >= endSearch)) {
1561 if (CharAt(pos) == charStartSearch) {
1562 bool found = (pos + lengthFind) <= limitPos;
1563 for (int indexSearch = 1; (indexSearch < lengthFind) && found; indexSearch++) {
1564 found = CharAt(pos + indexSearch) == search[indexSearch];
1566 if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
1567 return pos;
1570 if (!NextCharacter(pos, increment))
1571 break;
1573 } else if (SC_CP_UTF8 == dbcsCodePage) {
1574 const size_t maxFoldingExpansion = 4;
1575 std::vector<char> searchThing(lengthFind * UTF8MaxBytes * maxFoldingExpansion + 1);
1576 const int lenSearch = static_cast<int>(
1577 pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind));
1578 char bytes[UTF8MaxBytes + 1];
1579 char folded[UTF8MaxBytes * maxFoldingExpansion + 1];
1580 while (forward ? (pos < endPos) : (pos >= endPos)) {
1581 int widthFirstCharacter = 0;
1582 int posIndexDocument = pos;
1583 int indexSearch = 0;
1584 bool characterMatches = true;
1585 for (;;) {
1586 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(posIndexDocument));
1587 bytes[0] = leadByte;
1588 int widthChar = 1;
1589 if (!UTF8IsAscii(leadByte)) {
1590 const int widthCharBytes = UTF8BytesOfLead[leadByte];
1591 for (int b=1; b<widthCharBytes; b++) {
1592 bytes[b] = cb.CharAt(posIndexDocument+b);
1594 widthChar = UTF8Classify(reinterpret_cast<const unsigned char *>(bytes), widthCharBytes) & UTF8MaskWidth;
1596 if (!widthFirstCharacter)
1597 widthFirstCharacter = widthChar;
1598 if ((posIndexDocument + widthChar) > limitPos)
1599 break;
1600 const int lenFlat = static_cast<int>(pcf->Fold(folded, sizeof(folded), bytes, widthChar));
1601 folded[lenFlat] = 0;
1602 // Does folded match the buffer
1603 characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat);
1604 if (!characterMatches)
1605 break;
1606 posIndexDocument += widthChar;
1607 indexSearch += lenFlat;
1608 if (indexSearch >= lenSearch)
1609 break;
1611 if (characterMatches && (indexSearch == static_cast<int>(lenSearch))) {
1612 if (MatchesWordOptions(word, wordStart, pos, posIndexDocument - pos)) {
1613 *length = posIndexDocument - pos;
1614 return pos;
1617 if (forward) {
1618 pos += widthFirstCharacter;
1619 } else {
1620 if (!NextCharacter(pos, increment))
1621 break;
1624 } else if (dbcsCodePage) {
1625 const size_t maxBytesCharacter = 2;
1626 const size_t maxFoldingExpansion = 4;
1627 std::vector<char> searchThing(lengthFind * maxBytesCharacter * maxFoldingExpansion + 1);
1628 const int lenSearch = static_cast<int>(
1629 pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind));
1630 while (forward ? (pos < endPos) : (pos >= endPos)) {
1631 int indexDocument = 0;
1632 int indexSearch = 0;
1633 bool characterMatches = true;
1634 while (characterMatches &&
1635 ((pos + indexDocument) < limitPos) &&
1636 (indexSearch < lenSearch)) {
1637 char bytes[maxBytesCharacter + 1];
1638 bytes[0] = cb.CharAt(pos + indexDocument);
1639 const int widthChar = IsDBCSLeadByte(bytes[0]) ? 2 : 1;
1640 if (widthChar == 2)
1641 bytes[1] = cb.CharAt(pos + indexDocument + 1);
1642 if ((pos + indexDocument + widthChar) > limitPos)
1643 break;
1644 char folded[maxBytesCharacter * maxFoldingExpansion + 1];
1645 const int lenFlat = static_cast<int>(pcf->Fold(folded, sizeof(folded), bytes, widthChar));
1646 folded[lenFlat] = 0;
1647 // Does folded match the buffer
1648 characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat);
1649 indexDocument += widthChar;
1650 indexSearch += lenFlat;
1652 if (characterMatches && (indexSearch == static_cast<int>(lenSearch))) {
1653 if (MatchesWordOptions(word, wordStart, pos, indexDocument)) {
1654 *length = indexDocument;
1655 return pos;
1658 if (!NextCharacter(pos, increment))
1659 break;
1661 } else {
1662 const int endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos;
1663 std::vector<char> searchThing(lengthFind + 1);
1664 pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind);
1665 while (forward ? (pos < endSearch) : (pos >= endSearch)) {
1666 bool found = (pos + lengthFind) <= limitPos;
1667 for (int indexSearch = 0; (indexSearch < lengthFind) && found; indexSearch++) {
1668 char ch = CharAt(pos + indexSearch);
1669 char folded[2];
1670 pcf->Fold(folded, sizeof(folded), &ch, 1);
1671 found = folded[0] == searchThing[indexSearch];
1673 if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
1674 return pos;
1676 if (!NextCharacter(pos, increment))
1677 break;
1681 //Platform::DebugPrintf("Not found\n");
1682 return -1;
1685 const char *Document::SubstituteByPosition(const char *text, int *length) {
1686 if (regex)
1687 return regex->SubstituteByPosition(this, text, length);
1688 else
1689 return 0;
1692 int Document::LinesTotal() const {
1693 return cb.Lines();
1696 void Document::SetDefaultCharClasses(bool includeWordClass) {
1697 charClass.SetDefaultCharClasses(includeWordClass);
1700 void Document::SetCharClasses(const unsigned char *chars, CharClassify::cc newCharClass) {
1701 charClass.SetCharClasses(chars, newCharClass);
1704 int Document::GetCharsOfClass(CharClassify::cc characterClass, unsigned char *buffer) {
1705 return charClass.GetCharsOfClass(characterClass, buffer);
1708 void Document::SetStylingBits(int bits) {
1709 stylingBits = bits;
1710 stylingBitsMask = (1 << stylingBits) - 1;
1713 void SCI_METHOD Document::StartStyling(int position, char mask) {
1714 stylingMask = mask;
1715 endStyled = position;
1718 bool SCI_METHOD Document::SetStyleFor(int length, char style) {
1719 if (enteredStyling != 0) {
1720 return false;
1721 } else {
1722 enteredStyling++;
1723 style &= stylingMask;
1724 int prevEndStyled = endStyled;
1725 if (cb.SetStyleFor(endStyled, length, style, stylingMask)) {
1726 DocModification mh(SC_MOD_CHANGESTYLE | SC_PERFORMED_USER,
1727 prevEndStyled, length);
1728 NotifyModified(mh);
1730 endStyled += length;
1731 enteredStyling--;
1732 return true;
1736 bool SCI_METHOD Document::SetStyles(int length, const char *styles) {
1737 if (enteredStyling != 0) {
1738 return false;
1739 } else {
1740 enteredStyling++;
1741 bool didChange = false;
1742 int startMod = 0;
1743 int endMod = 0;
1744 for (int iPos = 0; iPos < length; iPos++, endStyled++) {
1745 PLATFORM_ASSERT(endStyled < Length());
1746 if (cb.SetStyleAt(endStyled, styles[iPos], stylingMask)) {
1747 if (!didChange) {
1748 startMod = endStyled;
1750 didChange = true;
1751 endMod = endStyled;
1754 if (didChange) {
1755 DocModification mh(SC_MOD_CHANGESTYLE | SC_PERFORMED_USER,
1756 startMod, endMod - startMod + 1);
1757 NotifyModified(mh);
1759 enteredStyling--;
1760 return true;
1764 void Document::EnsureStyledTo(int pos) {
1765 if ((enteredStyling == 0) && (pos > GetEndStyled())) {
1766 IncrementStyleClock();
1767 if (pli && !pli->UseContainerLexing()) {
1768 int lineEndStyled = LineFromPosition(GetEndStyled());
1769 int endStyledTo = LineStart(lineEndStyled);
1770 pli->Colourise(endStyledTo, pos);
1771 } else {
1772 // Ask the watchers to style, and stop as soon as one responds.
1773 for (std::vector<WatcherWithUserData>::iterator it = watchers.begin();
1774 (pos > GetEndStyled()) && (it != watchers.end()); ++it) {
1775 it->watcher->NotifyStyleNeeded(this, it->userData, pos);
1781 void Document::LexerChanged() {
1782 // Tell the watchers the lexer has changed.
1783 for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
1784 it->watcher->NotifyLexerChanged(this, it->userData);
1788 int SCI_METHOD Document::SetLineState(int line, int state) {
1789 int statePrevious = static_cast<LineState *>(perLineData[ldState])->SetLineState(line, state);
1790 if (state != statePrevious) {
1791 DocModification mh(SC_MOD_CHANGELINESTATE, LineStart(line), 0, 0, 0, line);
1792 NotifyModified(mh);
1794 return statePrevious;
1797 int SCI_METHOD Document::GetLineState(int line) const {
1798 return static_cast<LineState *>(perLineData[ldState])->GetLineState(line);
1801 int Document::GetMaxLineState() {
1802 return static_cast<LineState *>(perLineData[ldState])->GetMaxLineState();
1805 void SCI_METHOD Document::ChangeLexerState(int start, int end) {
1806 DocModification mh(SC_MOD_LEXERSTATE, start, end-start, 0, 0, 0);
1807 NotifyModified(mh);
1810 StyledText Document::MarginStyledText(int line) const {
1811 LineAnnotation *pla = static_cast<LineAnnotation *>(perLineData[ldMargin]);
1812 return StyledText(pla->Length(line), pla->Text(line),
1813 pla->MultipleStyles(line), pla->Style(line), pla->Styles(line));
1816 void Document::MarginSetText(int line, const char *text) {
1817 static_cast<LineAnnotation *>(perLineData[ldMargin])->SetText(line, text);
1818 DocModification mh(SC_MOD_CHANGEMARGIN, LineStart(line), 0, 0, 0, line);
1819 NotifyModified(mh);
1822 void Document::MarginSetStyle(int line, int style) {
1823 static_cast<LineAnnotation *>(perLineData[ldMargin])->SetStyle(line, style);
1824 NotifyModified(DocModification(SC_MOD_CHANGEMARGIN, LineStart(line), 0, 0, 0, line));
1827 void Document::MarginSetStyles(int line, const unsigned char *styles) {
1828 static_cast<LineAnnotation *>(perLineData[ldMargin])->SetStyles(line, styles);
1829 NotifyModified(DocModification(SC_MOD_CHANGEMARGIN, LineStart(line), 0, 0, 0, line));
1832 void Document::MarginClearAll() {
1833 int maxEditorLine = LinesTotal();
1834 for (int l=0; l<maxEditorLine; l++)
1835 MarginSetText(l, 0);
1836 // Free remaining data
1837 static_cast<LineAnnotation *>(perLineData[ldMargin])->ClearAll();
1840 StyledText Document::AnnotationStyledText(int line) const {
1841 LineAnnotation *pla = static_cast<LineAnnotation *>(perLineData[ldAnnotation]);
1842 return StyledText(pla->Length(line), pla->Text(line),
1843 pla->MultipleStyles(line), pla->Style(line), pla->Styles(line));
1846 void Document::AnnotationSetText(int line, const char *text) {
1847 if (line >= 0 && line < LinesTotal()) {
1848 const int linesBefore = AnnotationLines(line);
1849 static_cast<LineAnnotation *>(perLineData[ldAnnotation])->SetText(line, text);
1850 const int linesAfter = AnnotationLines(line);
1851 DocModification mh(SC_MOD_CHANGEANNOTATION, LineStart(line), 0, 0, 0, line);
1852 mh.annotationLinesAdded = linesAfter - linesBefore;
1853 NotifyModified(mh);
1857 void Document::AnnotationSetStyle(int line, int style) {
1858 static_cast<LineAnnotation *>(perLineData[ldAnnotation])->SetStyle(line, style);
1859 DocModification mh(SC_MOD_CHANGEANNOTATION, LineStart(line), 0, 0, 0, line);
1860 NotifyModified(mh);
1863 void Document::AnnotationSetStyles(int line, const unsigned char *styles) {
1864 if (line >= 0 && line < LinesTotal()) {
1865 static_cast<LineAnnotation *>(perLineData[ldAnnotation])->SetStyles(line, styles);
1869 int Document::AnnotationLines(int line) const {
1870 return static_cast<LineAnnotation *>(perLineData[ldAnnotation])->Lines(line);
1873 void Document::AnnotationClearAll() {
1874 int maxEditorLine = LinesTotal();
1875 for (int l=0; l<maxEditorLine; l++)
1876 AnnotationSetText(l, 0);
1877 // Free remaining data
1878 static_cast<LineAnnotation *>(perLineData[ldAnnotation])->ClearAll();
1881 void Document::IncrementStyleClock() {
1882 styleClock = (styleClock + 1) % 0x100000;
1885 void SCI_METHOD Document::DecorationFillRange(int position, int value, int fillLength) {
1886 if (decorations.FillRange(position, value, fillLength)) {
1887 DocModification mh(SC_MOD_CHANGEINDICATOR | SC_PERFORMED_USER,
1888 position, fillLength);
1889 NotifyModified(mh);
1893 bool Document::AddWatcher(DocWatcher *watcher, void *userData) {
1894 WatcherWithUserData wwud(watcher, userData);
1895 std::vector<WatcherWithUserData>::iterator it =
1896 std::find(watchers.begin(), watchers.end(), wwud);
1897 if (it != watchers.end())
1898 return false;
1899 watchers.push_back(wwud);
1900 return true;
1903 bool Document::RemoveWatcher(DocWatcher *watcher, void *userData) {
1904 std::vector<WatcherWithUserData>::iterator it =
1905 std::find(watchers.begin(), watchers.end(), WatcherWithUserData(watcher, userData));
1906 if (it != watchers.end()) {
1907 watchers.erase(it);
1908 return true;
1910 return false;
1913 void Document::NotifyModifyAttempt() {
1914 for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
1915 it->watcher->NotifyModifyAttempt(this, it->userData);
1919 void Document::NotifySavePoint(bool atSavePoint) {
1920 for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
1921 it->watcher->NotifySavePoint(this, it->userData, atSavePoint);
1925 void Document::NotifyModified(DocModification mh) {
1926 if (mh.modificationType & SC_MOD_INSERTTEXT) {
1927 decorations.InsertSpace(mh.position, mh.length);
1928 } else if (mh.modificationType & SC_MOD_DELETETEXT) {
1929 decorations.DeleteRange(mh.position, mh.length);
1931 for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
1932 it->watcher->NotifyModified(this, mh, it->userData);
1936 bool Document::IsWordPartSeparator(char ch) const {
1937 return (WordCharClass(ch) == CharClassify::ccWord) && IsPunctuation(ch);
1940 int Document::WordPartLeft(int pos) {
1941 if (pos > 0) {
1942 --pos;
1943 char startChar = cb.CharAt(pos);
1944 if (IsWordPartSeparator(startChar)) {
1945 while (pos > 0 && IsWordPartSeparator(cb.CharAt(pos))) {
1946 --pos;
1949 if (pos > 0) {
1950 startChar = cb.CharAt(pos);
1951 --pos;
1952 if (IsLowerCase(startChar)) {
1953 while (pos > 0 && IsLowerCase(cb.CharAt(pos)))
1954 --pos;
1955 if (!IsUpperCase(cb.CharAt(pos)) && !IsLowerCase(cb.CharAt(pos)))
1956 ++pos;
1957 } else if (IsUpperCase(startChar)) {
1958 while (pos > 0 && IsUpperCase(cb.CharAt(pos)))
1959 --pos;
1960 if (!IsUpperCase(cb.CharAt(pos)))
1961 ++pos;
1962 } else if (IsADigit(startChar)) {
1963 while (pos > 0 && IsADigit(cb.CharAt(pos)))
1964 --pos;
1965 if (!IsADigit(cb.CharAt(pos)))
1966 ++pos;
1967 } else if (IsPunctuation(startChar)) {
1968 while (pos > 0 && IsPunctuation(cb.CharAt(pos)))
1969 --pos;
1970 if (!IsPunctuation(cb.CharAt(pos)))
1971 ++pos;
1972 } else if (isspacechar(startChar)) {
1973 while (pos > 0 && isspacechar(cb.CharAt(pos)))
1974 --pos;
1975 if (!isspacechar(cb.CharAt(pos)))
1976 ++pos;
1977 } else if (!IsASCII(startChar)) {
1978 while (pos > 0 && !IsASCII(cb.CharAt(pos)))
1979 --pos;
1980 if (IsASCII(cb.CharAt(pos)))
1981 ++pos;
1982 } else {
1983 ++pos;
1987 return pos;
1990 int Document::WordPartRight(int pos) {
1991 char startChar = cb.CharAt(pos);
1992 int length = Length();
1993 if (IsWordPartSeparator(startChar)) {
1994 while (pos < length && IsWordPartSeparator(cb.CharAt(pos)))
1995 ++pos;
1996 startChar = cb.CharAt(pos);
1998 if (!IsASCII(startChar)) {
1999 while (pos < length && !IsASCII(cb.CharAt(pos)))
2000 ++pos;
2001 } else if (IsLowerCase(startChar)) {
2002 while (pos < length && IsLowerCase(cb.CharAt(pos)))
2003 ++pos;
2004 } else if (IsUpperCase(startChar)) {
2005 if (IsLowerCase(cb.CharAt(pos + 1))) {
2006 ++pos;
2007 while (pos < length && IsLowerCase(cb.CharAt(pos)))
2008 ++pos;
2009 } else {
2010 while (pos < length && IsUpperCase(cb.CharAt(pos)))
2011 ++pos;
2013 if (IsLowerCase(cb.CharAt(pos)) && IsUpperCase(cb.CharAt(pos - 1)))
2014 --pos;
2015 } else if (IsADigit(startChar)) {
2016 while (pos < length && IsADigit(cb.CharAt(pos)))
2017 ++pos;
2018 } else if (IsPunctuation(startChar)) {
2019 while (pos < length && IsPunctuation(cb.CharAt(pos)))
2020 ++pos;
2021 } else if (isspacechar(startChar)) {
2022 while (pos < length && isspacechar(cb.CharAt(pos)))
2023 ++pos;
2024 } else {
2025 ++pos;
2027 return pos;
2030 bool IsLineEndChar(char c) {
2031 return (c == '\n' || c == '\r');
2034 int Document::ExtendStyleRange(int pos, int delta, bool singleLine) {
2035 int sStart = cb.StyleAt(pos);
2036 if (delta < 0) {
2037 while (pos > 0 && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsLineEndChar(cb.CharAt(pos))))
2038 pos--;
2039 pos++;
2040 } else {
2041 while (pos < (Length()) && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsLineEndChar(cb.CharAt(pos))))
2042 pos++;
2044 return pos;
2047 static char BraceOpposite(char ch) {
2048 switch (ch) {
2049 case '(':
2050 return ')';
2051 case ')':
2052 return '(';
2053 case '[':
2054 return ']';
2055 case ']':
2056 return '[';
2057 case '{':
2058 return '}';
2059 case '}':
2060 return '{';
2061 case '<':
2062 return '>';
2063 case '>':
2064 return '<';
2065 default:
2066 return '\0';
2070 // TODO: should be able to extend styled region to find matching brace
2071 int Document::BraceMatch(int position, int /*maxReStyle*/) {
2072 char chBrace = CharAt(position);
2073 char chSeek = BraceOpposite(chBrace);
2074 if (chSeek == '\0')
2075 return - 1;
2076 char styBrace = static_cast<char>(StyleAt(position) & stylingBitsMask);
2077 int direction = -1;
2078 if (chBrace == '(' || chBrace == '[' || chBrace == '{' || chBrace == '<')
2079 direction = 1;
2080 int depth = 1;
2081 position = NextPosition(position, direction);
2082 while ((position >= 0) && (position < Length())) {
2083 char chAtPos = CharAt(position);
2084 char styAtPos = static_cast<char>(StyleAt(position) & stylingBitsMask);
2085 if ((position > GetEndStyled()) || (styAtPos == styBrace)) {
2086 if (chAtPos == chBrace)
2087 depth++;
2088 if (chAtPos == chSeek)
2089 depth--;
2090 if (depth == 0)
2091 return position;
2093 int positionBeforeMove = position;
2094 position = NextPosition(position, direction);
2095 if (position == positionBeforeMove)
2096 break;
2098 return - 1;
2102 * Implementation of RegexSearchBase for the default built-in regular expression engine
2104 class BuiltinRegex : public RegexSearchBase {
2105 public:
2106 explicit BuiltinRegex(CharClassify *charClassTable) : search(charClassTable) {}
2108 virtual ~BuiltinRegex() {
2111 virtual long FindText(Document *doc, int minPos, int maxPos, const char *s,
2112 bool caseSensitive, bool word, bool wordStart, int flags,
2113 int *length);
2115 virtual const char *SubstituteByPosition(Document *doc, const char *text, int *length);
2117 private:
2118 RESearch search;
2119 std::string substituted;
2122 // Define a way for the Regular Expression code to access the document
2123 class DocumentIndexer : public CharacterIndexer {
2124 Document *pdoc;
2125 int end;
2126 public:
2127 DocumentIndexer(Document *pdoc_, int end_) :
2128 pdoc(pdoc_), end(end_) {
2131 virtual ~DocumentIndexer() {
2134 virtual char CharAt(int index) {
2135 if (index < 0 || index >= end)
2136 return 0;
2137 else
2138 return pdoc->CharAt(index);
2142 long BuiltinRegex::FindText(Document *doc, int minPos, int maxPos, const char *s,
2143 bool caseSensitive, bool, bool, int flags,
2144 int *length) {
2145 bool posix = (flags & SCFIND_POSIX) != 0;
2146 int increment = (minPos <= maxPos) ? 1 : -1;
2148 int startPos = minPos;
2149 int endPos = maxPos;
2151 // Range endpoints should not be inside DBCS characters, but just in case, move them.
2152 startPos = doc->MovePositionOutsideChar(startPos, 1, false);
2153 endPos = doc->MovePositionOutsideChar(endPos, 1, false);
2155 const char *errmsg = search.Compile(s, *length, caseSensitive, posix);
2156 if (errmsg) {
2157 return -1;
2159 // Find a variable in a property file: \$(\([A-Za-z0-9_.]+\))
2160 // Replace first '.' with '-' in each property file variable reference:
2161 // Search: \$(\([A-Za-z0-9_-]+\)\.\([A-Za-z0-9_.]+\))
2162 // Replace: $(\1-\2)
2163 int lineRangeStart = doc->LineFromPosition(startPos);
2164 int lineRangeEnd = doc->LineFromPosition(endPos);
2165 if ((increment == 1) &&
2166 (startPos >= doc->LineEnd(lineRangeStart)) &&
2167 (lineRangeStart < lineRangeEnd)) {
2168 // the start position is at end of line or between line end characters.
2169 lineRangeStart++;
2170 startPos = doc->LineStart(lineRangeStart);
2171 } else if ((increment == -1) &&
2172 (startPos <= doc->LineStart(lineRangeStart)) &&
2173 (lineRangeStart > lineRangeEnd)) {
2174 // the start position is at beginning of line.
2175 lineRangeStart--;
2176 startPos = doc->LineEnd(lineRangeStart);
2178 int pos = -1;
2179 int lenRet = 0;
2180 char searchEnd = s[*length - 1];
2181 char searchEndPrev = (*length > 1) ? s[*length - 2] : '\0';
2182 int lineRangeBreak = lineRangeEnd + increment;
2183 for (int line = lineRangeStart; line != lineRangeBreak; line += increment) {
2184 int startOfLine = doc->LineStart(line);
2185 int endOfLine = doc->LineEnd(line);
2186 if (increment == 1) {
2187 if (line == lineRangeStart) {
2188 if ((startPos != startOfLine) && (s[0] == '^'))
2189 continue; // Can't match start of line if start position after start of line
2190 startOfLine = startPos;
2192 if (line == lineRangeEnd) {
2193 if ((endPos != endOfLine) && (searchEnd == '$') && (searchEndPrev != '\\'))
2194 continue; // Can't match end of line if end position before end of line
2195 endOfLine = endPos;
2197 } else {
2198 if (line == lineRangeEnd) {
2199 if ((endPos != startOfLine) && (s[0] == '^'))
2200 continue; // Can't match start of line if end position after start of line
2201 startOfLine = endPos;
2203 if (line == lineRangeStart) {
2204 if ((startPos != endOfLine) && (searchEnd == '$') && (searchEndPrev != '\\'))
2205 continue; // Can't match end of line if start position before end of line
2206 endOfLine = startPos;
2210 DocumentIndexer di(doc, endOfLine);
2211 int success = search.Execute(di, startOfLine, endOfLine);
2212 if (success) {
2213 pos = search.bopat[0];
2214 // Ensure only whole characters selected
2215 search.eopat[0] = doc->MovePositionOutsideChar(search.eopat[0], 1, false);
2216 lenRet = search.eopat[0] - search.bopat[0];
2217 // There can be only one start of a line, so no need to look for last match in line
2218 if ((increment == -1) && (s[0] != '^')) {
2219 // Check for the last match on this line.
2220 int repetitions = 1000; // Break out of infinite loop
2221 while (success && (search.eopat[0] <= endOfLine) && (repetitions--)) {
2222 success = search.Execute(di, pos+1, endOfLine);
2223 if (success) {
2224 if (search.eopat[0] <= minPos) {
2225 pos = search.bopat[0];
2226 lenRet = search.eopat[0] - search.bopat[0];
2227 } else {
2228 success = 0;
2233 break;
2236 *length = lenRet;
2237 return pos;
2240 const char *BuiltinRegex::SubstituteByPosition(Document *doc, const char *text, int *length) {
2241 substituted.clear();
2242 DocumentIndexer di(doc, doc->Length());
2243 search.GrabMatches(di);
2244 for (int j = 0; j < *length; j++) {
2245 if (text[j] == '\\') {
2246 if (text[j + 1] >= '0' && text[j + 1] <= '9') {
2247 unsigned int patNum = text[j + 1] - '0';
2248 unsigned int len = search.eopat[patNum] - search.bopat[patNum];
2249 if (!search.pat[patNum].empty()) // Will be null if try for a match that did not occur
2250 substituted.append(search.pat[patNum].c_str(), len);
2251 j++;
2252 } else {
2253 j++;
2254 switch (text[j]) {
2255 case 'a':
2256 substituted.push_back('\a');
2257 break;
2258 case 'b':
2259 substituted.push_back('\b');
2260 break;
2261 case 'f':
2262 substituted.push_back('\f');
2263 break;
2264 case 'n':
2265 substituted.push_back('\n');
2266 break;
2267 case 'r':
2268 substituted.push_back('\r');
2269 break;
2270 case 't':
2271 substituted.push_back('\t');
2272 break;
2273 case 'v':
2274 substituted.push_back('\v');
2275 break;
2276 case '\\':
2277 substituted.push_back('\\');
2278 break;
2279 default:
2280 substituted.push_back('\\');
2281 j--;
2284 } else {
2285 substituted.push_back(text[j]);
2288 *length = static_cast<int>(substituted.length());
2289 return substituted.c_str();
2292 #ifndef SCI_OWNREGEX
2294 #ifdef SCI_NAMESPACE
2296 RegexSearchBase *Scintilla::CreateRegexSearch(CharClassify *charClassTable) {
2297 return new BuiltinRegex(charClassTable);
2300 #else
2302 RegexSearchBase *CreateRegexSearch(CharClassify *charClassTable) {
2303 return new BuiltinRegex(charClassTable);
2306 #endif
2308 #endif