Workaround cannot operate with keyboard when MenuButton has only 1 entry
[TortoiseGit.git] / ext / scintilla / src / Document.cxx
blobad2a183506b4ad43f16983c2b0c725c8de85ec98
1 // Scintilla source code edit control
2 /** @file Document.cxx
3 ** Text document that handles notifications, DBCS, styling, words and end of line.
4 **/
5 // Copyright 1998-2011 by Neil Hodgson <neilh@scintilla.org>
6 // The License.txt file describes the conditions under which this software may be distributed.
8 #include <stdlib.h>
9 #include <string.h>
10 #include <stdio.h>
11 #include <assert.h>
12 #include <ctype.h>
14 #include <string>
15 #include <vector>
16 #include <algorithm>
18 #include "Platform.h"
20 #include "ILexer.h"
21 #include "Scintilla.h"
23 #include "CharacterSet.h"
24 #include "SplitVector.h"
25 #include "Partitioning.h"
26 #include "RunStyles.h"
27 #include "CellBuffer.h"
28 #include "PerLine.h"
29 #include "CharClassify.h"
30 #include "Decoration.h"
31 #include "CaseFolder.h"
32 #include "Document.h"
33 #include "RESearch.h"
34 #include "UniConversion.h"
36 #ifdef SCI_NAMESPACE
37 using namespace Scintilla;
38 #endif
40 static inline bool IsPunctuation(char ch) {
41 return IsASCII(ch) && ispunct(ch);
44 void LexInterface::Colourise(int start, int end) {
45 if (pdoc && instance && !performingStyle) {
46 // Protect against reentrance, which may occur, for example, when
47 // fold points are discovered while performing styling and the folding
48 // code looks for child lines which may trigger styling.
49 performingStyle = true;
51 int lengthDoc = pdoc->Length();
52 if (end == -1)
53 end = lengthDoc;
54 int len = end - start;
56 PLATFORM_ASSERT(len >= 0);
57 PLATFORM_ASSERT(start + len <= lengthDoc);
59 int styleStart = 0;
60 if (start > 0)
61 styleStart = pdoc->StyleAt(start - 1);
63 if (len > 0) {
64 instance->Lex(start, len, styleStart, pdoc);
65 instance->Fold(start, len, styleStart, pdoc);
68 performingStyle = false;
72 int LexInterface::LineEndTypesSupported() {
73 if (instance) {
74 int interfaceVersion = instance->Version();
75 if (interfaceVersion >= lvSubStyles) {
76 ILexerWithSubStyles *ssinstance = static_cast<ILexerWithSubStyles *>(instance);
77 return ssinstance->LineEndTypesSupported();
80 return 0;
83 Document::Document() {
84 refCount = 0;
85 pcf = NULL;
86 #ifdef _WIN32
87 eolMode = SC_EOL_CRLF;
88 #else
89 eolMode = SC_EOL_LF;
90 #endif
91 dbcsCodePage = 0;
92 lineEndBitSet = SC_LINE_END_TYPE_DEFAULT;
93 endStyled = 0;
94 styleClock = 0;
95 enteredModification = 0;
96 enteredStyling = 0;
97 enteredReadOnlyCount = 0;
98 insertionSet = false;
99 tabInChars = 8;
100 indentInChars = 0;
101 actualIndentInChars = 8;
102 useTabs = true;
103 tabIndents = true;
104 backspaceUnindents = false;
106 matchesValid = false;
107 regex = 0;
109 UTF8BytesOfLeadInitialise();
111 perLineData[ldMarkers] = new LineMarkers();
112 perLineData[ldLevels] = new LineLevels();
113 perLineData[ldState] = new LineState();
114 perLineData[ldMargin] = new LineAnnotation();
115 perLineData[ldAnnotation] = new LineAnnotation();
117 cb.SetPerLine(this);
119 pli = 0;
122 Document::~Document() {
123 for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
124 it->watcher->NotifyDeleted(this, it->userData);
126 for (int j=0; j<ldSize; j++) {
127 delete perLineData[j];
128 perLineData[j] = 0;
130 delete regex;
131 regex = 0;
132 delete pli;
133 pli = 0;
134 delete pcf;
135 pcf = 0;
138 void Document::Init() {
139 for (int j=0; j<ldSize; j++) {
140 if (perLineData[j])
141 perLineData[j]->Init();
145 int Document::LineEndTypesSupported() const {
146 if ((SC_CP_UTF8 == dbcsCodePage) && pli)
147 return pli->LineEndTypesSupported();
148 else
149 return 0;
152 bool Document::SetDBCSCodePage(int dbcsCodePage_) {
153 if (dbcsCodePage != dbcsCodePage_) {
154 dbcsCodePage = dbcsCodePage_;
155 SetCaseFolder(NULL);
156 cb.SetLineEndTypes(lineEndBitSet & LineEndTypesSupported());
157 return true;
158 } else {
159 return false;
163 bool Document::SetLineEndTypesAllowed(int lineEndBitSet_) {
164 if (lineEndBitSet != lineEndBitSet_) {
165 lineEndBitSet = lineEndBitSet_;
166 int lineEndBitSetActive = lineEndBitSet & LineEndTypesSupported();
167 if (lineEndBitSetActive != cb.GetLineEndTypes()) {
168 ModifiedAt(0);
169 cb.SetLineEndTypes(lineEndBitSetActive);
170 return true;
171 } else {
172 return false;
174 } else {
175 return false;
179 void Document::InsertLine(int line) {
180 for (int j=0; j<ldSize; j++) {
181 if (perLineData[j])
182 perLineData[j]->InsertLine(line);
186 void Document::RemoveLine(int line) {
187 for (int j=0; j<ldSize; j++) {
188 if (perLineData[j])
189 perLineData[j]->RemoveLine(line);
193 // Increase reference count and return its previous value.
194 int Document::AddRef() {
195 return refCount++;
198 // Decrease reference count and return its previous value.
199 // Delete the document if reference count reaches zero.
200 int SCI_METHOD Document::Release() {
201 int curRefCount = --refCount;
202 if (curRefCount == 0)
203 delete this;
204 return curRefCount;
207 void Document::SetSavePoint() {
208 cb.SetSavePoint();
209 NotifySavePoint(true);
212 int Document::GetMark(int line) {
213 return static_cast<LineMarkers *>(perLineData[ldMarkers])->MarkValue(line);
216 int Document::MarkerNext(int lineStart, int mask) const {
217 return static_cast<LineMarkers *>(perLineData[ldMarkers])->MarkerNext(lineStart, mask);
220 int Document::AddMark(int line, int markerNum) {
221 if (line >= 0 && line <= LinesTotal()) {
222 int prev = static_cast<LineMarkers *>(perLineData[ldMarkers])->
223 AddMark(line, markerNum, LinesTotal());
224 DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, 0, line);
225 NotifyModified(mh);
226 return prev;
227 } else {
228 return 0;
232 void Document::AddMarkSet(int line, int valueSet) {
233 if (line < 0 || line > LinesTotal()) {
234 return;
236 unsigned int m = valueSet;
237 for (int i = 0; m; i++, m >>= 1)
238 if (m & 1)
239 static_cast<LineMarkers *>(perLineData[ldMarkers])->
240 AddMark(line, i, LinesTotal());
241 DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, 0, line);
242 NotifyModified(mh);
245 void Document::DeleteMark(int line, int markerNum) {
246 static_cast<LineMarkers *>(perLineData[ldMarkers])->DeleteMark(line, markerNum, false);
247 DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, 0, line);
248 NotifyModified(mh);
251 void Document::DeleteMarkFromHandle(int markerHandle) {
252 static_cast<LineMarkers *>(perLineData[ldMarkers])->DeleteMarkFromHandle(markerHandle);
253 DocModification mh(SC_MOD_CHANGEMARKER, 0, 0, 0, 0);
254 mh.line = -1;
255 NotifyModified(mh);
258 void Document::DeleteAllMarks(int markerNum) {
259 bool someChanges = false;
260 for (int line = 0; line < LinesTotal(); line++) {
261 if (static_cast<LineMarkers *>(perLineData[ldMarkers])->DeleteMark(line, markerNum, true))
262 someChanges = true;
264 if (someChanges) {
265 DocModification mh(SC_MOD_CHANGEMARKER, 0, 0, 0, 0);
266 mh.line = -1;
267 NotifyModified(mh);
271 int Document::LineFromHandle(int markerHandle) {
272 return static_cast<LineMarkers *>(perLineData[ldMarkers])->LineFromHandle(markerHandle);
275 int SCI_METHOD Document::LineStart(int line) const {
276 return cb.LineStart(line);
279 int SCI_METHOD Document::LineEnd(int line) const {
280 if (line >= LinesTotal() - 1) {
281 return LineStart(line + 1);
282 } else {
283 int position = LineStart(line + 1);
284 if (SC_CP_UTF8 == dbcsCodePage) {
285 unsigned char bytes[] = {
286 static_cast<unsigned char>(cb.CharAt(position-3)),
287 static_cast<unsigned char>(cb.CharAt(position-2)),
288 static_cast<unsigned char>(cb.CharAt(position-1)),
290 if (UTF8IsSeparator(bytes)) {
291 return position - UTF8SeparatorLength;
293 if (UTF8IsNEL(bytes+1)) {
294 return position - UTF8NELLength;
297 position--; // Back over CR or LF
298 // When line terminator is CR+LF, may need to go back one more
299 if ((position > LineStart(line)) && (cb.CharAt(position - 1) == '\r')) {
300 position--;
302 return position;
306 void SCI_METHOD Document::SetErrorStatus(int status) {
307 // Tell the watchers an error has occurred.
308 for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
309 it->watcher->NotifyErrorOccurred(this, it->userData, status);
313 int SCI_METHOD Document::LineFromPosition(int pos) const {
314 return cb.LineFromPosition(pos);
317 int Document::LineEndPosition(int position) const {
318 return LineEnd(LineFromPosition(position));
321 bool Document::IsLineEndPosition(int position) const {
322 return LineEnd(LineFromPosition(position)) == position;
325 bool Document::IsPositionInLineEnd(int position) const {
326 return position >= LineEnd(LineFromPosition(position));
329 int Document::VCHomePosition(int position) const {
330 int line = LineFromPosition(position);
331 int startPosition = LineStart(line);
332 int endLine = LineEnd(line);
333 int startText = startPosition;
334 while (startText < endLine && (cb.CharAt(startText) == ' ' || cb.CharAt(startText) == '\t'))
335 startText++;
336 if (position == startText)
337 return startPosition;
338 else
339 return startText;
342 int SCI_METHOD Document::SetLevel(int line, int level) {
343 int prev = static_cast<LineLevels *>(perLineData[ldLevels])->SetLevel(line, level, LinesTotal());
344 if (prev != level) {
345 DocModification mh(SC_MOD_CHANGEFOLD | SC_MOD_CHANGEMARKER,
346 LineStart(line), 0, 0, 0, line);
347 mh.foldLevelNow = level;
348 mh.foldLevelPrev = prev;
349 NotifyModified(mh);
351 return prev;
354 int SCI_METHOD Document::GetLevel(int line) const {
355 return static_cast<LineLevels *>(perLineData[ldLevels])->GetLevel(line);
358 void Document::ClearLevels() {
359 static_cast<LineLevels *>(perLineData[ldLevels])->ClearLevels();
362 static bool IsSubordinate(int levelStart, int levelTry) {
363 if (levelTry & SC_FOLDLEVELWHITEFLAG)
364 return true;
365 else
366 return (levelStart & SC_FOLDLEVELNUMBERMASK) < (levelTry & SC_FOLDLEVELNUMBERMASK);
369 int Document::GetLastChild(int lineParent, int level, int lastLine) {
370 if (level == -1)
371 level = GetLevel(lineParent) & SC_FOLDLEVELNUMBERMASK;
372 int maxLine = LinesTotal();
373 int lookLastLine = (lastLine != -1) ? Platform::Minimum(LinesTotal() - 1, lastLine) : -1;
374 int lineMaxSubord = lineParent;
375 while (lineMaxSubord < maxLine - 1) {
376 EnsureStyledTo(LineStart(lineMaxSubord + 2));
377 if (!IsSubordinate(level, GetLevel(lineMaxSubord + 1)))
378 break;
379 if ((lookLastLine != -1) && (lineMaxSubord >= lookLastLine) && !(GetLevel(lineMaxSubord) & SC_FOLDLEVELWHITEFLAG))
380 break;
381 lineMaxSubord++;
383 if (lineMaxSubord > lineParent) {
384 if (level > (GetLevel(lineMaxSubord + 1) & SC_FOLDLEVELNUMBERMASK)) {
385 // Have chewed up some whitespace that belongs to a parent so seek back
386 if (GetLevel(lineMaxSubord) & SC_FOLDLEVELWHITEFLAG) {
387 lineMaxSubord--;
391 return lineMaxSubord;
394 int Document::GetFoldParent(int line) const {
395 int level = GetLevel(line) & SC_FOLDLEVELNUMBERMASK;
396 int lineLook = line - 1;
397 while ((lineLook > 0) && (
398 (!(GetLevel(lineLook) & SC_FOLDLEVELHEADERFLAG)) ||
399 ((GetLevel(lineLook) & SC_FOLDLEVELNUMBERMASK) >= level))
401 lineLook--;
403 if ((GetLevel(lineLook) & SC_FOLDLEVELHEADERFLAG) &&
404 ((GetLevel(lineLook) & SC_FOLDLEVELNUMBERMASK) < level)) {
405 return lineLook;
406 } else {
407 return -1;
411 void Document::GetHighlightDelimiters(HighlightDelimiter &highlightDelimiter, int line, int lastLine) {
412 int level = GetLevel(line);
413 int lookLastLine = Platform::Maximum(line, lastLine) + 1;
415 int lookLine = line;
416 int lookLineLevel = level;
417 int lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
418 while ((lookLine > 0) && ((lookLineLevel & SC_FOLDLEVELWHITEFLAG) ||
419 ((lookLineLevel & SC_FOLDLEVELHEADERFLAG) && (lookLineLevelNum >= (GetLevel(lookLine + 1) & SC_FOLDLEVELNUMBERMASK))))) {
420 lookLineLevel = GetLevel(--lookLine);
421 lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
424 int beginFoldBlock = (lookLineLevel & SC_FOLDLEVELHEADERFLAG) ? lookLine : GetFoldParent(lookLine);
425 if (beginFoldBlock == -1) {
426 highlightDelimiter.Clear();
427 return;
430 int endFoldBlock = GetLastChild(beginFoldBlock, -1, lookLastLine);
431 int firstChangeableLineBefore = -1;
432 if (endFoldBlock < line) {
433 lookLine = beginFoldBlock - 1;
434 lookLineLevel = GetLevel(lookLine);
435 lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
436 while ((lookLine >= 0) && (lookLineLevelNum >= SC_FOLDLEVELBASE)) {
437 if (lookLineLevel & SC_FOLDLEVELHEADERFLAG) {
438 if (GetLastChild(lookLine, -1, lookLastLine) == line) {
439 beginFoldBlock = lookLine;
440 endFoldBlock = line;
441 firstChangeableLineBefore = line - 1;
444 if ((lookLine > 0) && (lookLineLevelNum == SC_FOLDLEVELBASE) && ((GetLevel(lookLine - 1) & SC_FOLDLEVELNUMBERMASK) > lookLineLevelNum))
445 break;
446 lookLineLevel = GetLevel(--lookLine);
447 lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
450 if (firstChangeableLineBefore == -1) {
451 for (lookLine = line - 1, lookLineLevel = GetLevel(lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
452 lookLine >= beginFoldBlock;
453 lookLineLevel = GetLevel(--lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK) {
454 if ((lookLineLevel & SC_FOLDLEVELWHITEFLAG) || (lookLineLevelNum > (level & SC_FOLDLEVELNUMBERMASK))) {
455 firstChangeableLineBefore = lookLine;
456 break;
460 if (firstChangeableLineBefore == -1)
461 firstChangeableLineBefore = beginFoldBlock - 1;
463 int firstChangeableLineAfter = -1;
464 for (lookLine = line + 1, lookLineLevel = GetLevel(lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
465 lookLine <= endFoldBlock;
466 lookLineLevel = GetLevel(++lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK) {
467 if ((lookLineLevel & SC_FOLDLEVELHEADERFLAG) && (lookLineLevelNum < (GetLevel(lookLine + 1) & SC_FOLDLEVELNUMBERMASK))) {
468 firstChangeableLineAfter = lookLine;
469 break;
472 if (firstChangeableLineAfter == -1)
473 firstChangeableLineAfter = endFoldBlock + 1;
475 highlightDelimiter.beginFoldBlock = beginFoldBlock;
476 highlightDelimiter.endFoldBlock = endFoldBlock;
477 highlightDelimiter.firstChangeableLineBefore = firstChangeableLineBefore;
478 highlightDelimiter.firstChangeableLineAfter = firstChangeableLineAfter;
481 int Document::ClampPositionIntoDocument(int pos) const {
482 return Platform::Clamp(pos, 0, Length());
485 bool Document::IsCrLf(int pos) const {
486 if (pos < 0)
487 return false;
488 if (pos >= (Length() - 1))
489 return false;
490 return (cb.CharAt(pos) == '\r') && (cb.CharAt(pos + 1) == '\n');
493 int Document::LenChar(int pos) {
494 if (pos < 0) {
495 return 1;
496 } else if (IsCrLf(pos)) {
497 return 2;
498 } else if (SC_CP_UTF8 == dbcsCodePage) {
499 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(pos));
500 const int widthCharBytes = UTF8BytesOfLead[leadByte];
501 int lengthDoc = Length();
502 if ((pos + widthCharBytes) > lengthDoc)
503 return lengthDoc - pos;
504 else
505 return widthCharBytes;
506 } else if (dbcsCodePage) {
507 return IsDBCSLeadByte(cb.CharAt(pos)) ? 2 : 1;
508 } else {
509 return 1;
513 bool Document::InGoodUTF8(int pos, int &start, int &end) const {
514 int trail = pos;
515 while ((trail>0) && (pos-trail < UTF8MaxBytes) && UTF8IsTrailByte(static_cast<unsigned char>(cb.CharAt(trail-1))))
516 trail--;
517 start = (trail > 0) ? trail-1 : trail;
519 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(start));
520 const int widthCharBytes = UTF8BytesOfLead[leadByte];
521 if (widthCharBytes == 1) {
522 return false;
523 } else {
524 int trailBytes = widthCharBytes - 1;
525 int len = pos - start;
526 if (len > trailBytes)
527 // pos too far from lead
528 return false;
529 char charBytes[UTF8MaxBytes] = {static_cast<char>(leadByte),0,0,0};
530 for (int b=1; b<widthCharBytes && ((start+b) < Length()); b++)
531 charBytes[b] = cb.CharAt(static_cast<int>(start+b));
532 int utf8status = UTF8Classify(reinterpret_cast<const unsigned char *>(charBytes), widthCharBytes);
533 if (utf8status & UTF8MaskInvalid)
534 return false;
535 end = start + widthCharBytes;
536 return true;
540 // Normalise a position so that it is not halfway through a two byte character.
541 // This can occur in two situations -
542 // When lines are terminated with \r\n pairs which should be treated as one character.
543 // When displaying DBCS text such as Japanese.
544 // If moving, move the position in the indicated direction.
545 int Document::MovePositionOutsideChar(int pos, int moveDir, bool checkLineEnd) {
546 //Platform::DebugPrintf("NoCRLF %d %d\n", pos, moveDir);
547 // If out of range, just return minimum/maximum value.
548 if (pos <= 0)
549 return 0;
550 if (pos >= Length())
551 return Length();
553 // PLATFORM_ASSERT(pos > 0 && pos < Length());
554 if (checkLineEnd && IsCrLf(pos - 1)) {
555 if (moveDir > 0)
556 return pos + 1;
557 else
558 return pos - 1;
561 if (dbcsCodePage) {
562 if (SC_CP_UTF8 == dbcsCodePage) {
563 unsigned char ch = static_cast<unsigned char>(cb.CharAt(pos));
564 // If ch is not a trail byte then pos is valid intercharacter position
565 if (UTF8IsTrailByte(ch)) {
566 int startUTF = pos;
567 int endUTF = pos;
568 if (InGoodUTF8(pos, startUTF, endUTF)) {
569 // ch is a trail byte within a UTF-8 character
570 if (moveDir > 0)
571 pos = endUTF;
572 else
573 pos = startUTF;
575 // Else invalid UTF-8 so return position of isolated trail byte
577 } else {
578 // Anchor DBCS calculations at start of line because start of line can
579 // not be a DBCS trail byte.
580 int posStartLine = LineStart(LineFromPosition(pos));
581 if (pos == posStartLine)
582 return pos;
584 // Step back until a non-lead-byte is found.
585 int posCheck = pos;
586 while ((posCheck > posStartLine) && IsDBCSLeadByte(cb.CharAt(posCheck-1)))
587 posCheck--;
589 // Check from known start of character.
590 while (posCheck < pos) {
591 int mbsize = IsDBCSLeadByte(cb.CharAt(posCheck)) ? 2 : 1;
592 if (posCheck + mbsize == pos) {
593 return pos;
594 } else if (posCheck + mbsize > pos) {
595 if (moveDir > 0) {
596 return posCheck + mbsize;
597 } else {
598 return posCheck;
601 posCheck += mbsize;
606 return pos;
609 // NextPosition moves between valid positions - it can not handle a position in the middle of a
610 // multi-byte character. It is used to iterate through text more efficiently than MovePositionOutsideChar.
611 // A \r\n pair is treated as two characters.
612 int Document::NextPosition(int pos, int moveDir) const {
613 // If out of range, just return minimum/maximum value.
614 int increment = (moveDir > 0) ? 1 : -1;
615 if (pos + increment <= 0)
616 return 0;
617 if (pos + increment >= Length())
618 return Length();
620 if (dbcsCodePage) {
621 if (SC_CP_UTF8 == dbcsCodePage) {
622 if (increment == 1) {
623 // Simple forward movement case so can avoid some checks
624 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(pos));
625 if (UTF8IsAscii(leadByte)) {
626 // Single byte character or invalid
627 pos++;
628 } else {
629 const int widthCharBytes = UTF8BytesOfLead[leadByte];
630 char charBytes[UTF8MaxBytes] = {static_cast<char>(leadByte),0,0,0};
631 for (int b=1; b<widthCharBytes; b++)
632 charBytes[b] = cb.CharAt(static_cast<int>(pos+b));
633 int utf8status = UTF8Classify(reinterpret_cast<const unsigned char *>(charBytes), widthCharBytes);
634 if (utf8status & UTF8MaskInvalid)
635 pos++;
636 else
637 pos += utf8status & UTF8MaskWidth;
639 } else {
640 // Examine byte before position
641 pos--;
642 unsigned char ch = static_cast<unsigned char>(cb.CharAt(pos));
643 // If ch is not a trail byte then pos is valid intercharacter position
644 if (UTF8IsTrailByte(ch)) {
645 // If ch is a trail byte in a valid UTF-8 character then return start of character
646 int startUTF = pos;
647 int endUTF = pos;
648 if (InGoodUTF8(pos, startUTF, endUTF)) {
649 pos = startUTF;
651 // Else invalid UTF-8 so return position of isolated trail byte
654 } else {
655 if (moveDir > 0) {
656 int mbsize = IsDBCSLeadByte(cb.CharAt(pos)) ? 2 : 1;
657 pos += mbsize;
658 if (pos > Length())
659 pos = Length();
660 } else {
661 // Anchor DBCS calculations at start of line because start of line can
662 // not be a DBCS trail byte.
663 int posStartLine = LineStart(LineFromPosition(pos));
664 // See http://msdn.microsoft.com/en-us/library/cc194792%28v=MSDN.10%29.aspx
665 // http://msdn.microsoft.com/en-us/library/cc194790.aspx
666 if ((pos - 1) <= posStartLine) {
667 return pos - 1;
668 } else if (IsDBCSLeadByte(cb.CharAt(pos - 1))) {
669 // Must actually be trail byte
670 return pos - 2;
671 } else {
672 // Otherwise, step back until a non-lead-byte is found.
673 int posTemp = pos - 1;
674 while (posStartLine <= --posTemp && IsDBCSLeadByte(cb.CharAt(posTemp)))
676 // Now posTemp+1 must point to the beginning of a character,
677 // so figure out whether we went back an even or an odd
678 // number of bytes and go back 1 or 2 bytes, respectively.
679 return (pos - 1 - ((pos - posTemp) & 1));
683 } else {
684 pos += increment;
687 return pos;
690 bool Document::NextCharacter(int &pos, int moveDir) const {
691 // Returns true if pos changed
692 int posNext = NextPosition(pos, moveDir);
693 if (posNext == pos) {
694 return false;
695 } else {
696 pos = posNext;
697 return true;
701 static inline int UnicodeFromBytes(const unsigned char *us) {
702 if (us[0] < 0xC2) {
703 return us[0];
704 } else if (us[0] < 0xE0) {
705 return ((us[0] & 0x1F) << 6) + (us[1] & 0x3F);
706 } else if (us[0] < 0xF0) {
707 return ((us[0] & 0xF) << 12) + ((us[1] & 0x3F) << 6) + (us[2] & 0x3F);
708 } else if (us[0] < 0xF5) {
709 return ((us[0] & 0x7) << 18) + ((us[1] & 0x3F) << 12) + ((us[2] & 0x3F) << 6) + (us[3] & 0x3F);
711 return us[0];
714 // Return -1 on out-of-bounds
715 int SCI_METHOD Document::GetRelativePosition(int positionStart, int characterOffset) const {
716 int pos = positionStart;
717 if (dbcsCodePage) {
718 const int increment = (characterOffset > 0) ? 1 : -1;
719 while (characterOffset != 0) {
720 const int posNext = NextPosition(pos, increment);
721 if (posNext == pos)
722 return INVALID_POSITION;
723 pos = posNext;
724 characterOffset -= increment;
726 } else {
727 pos = positionStart + characterOffset;
728 if ((pos < 0) || (pos > Length()))
729 return INVALID_POSITION;
731 return pos;
734 int SCI_METHOD Document::GetCharacterAndWidth(int position, int *pWidth) const {
735 int character;
736 int bytesInCharacter = 1;
737 if (dbcsCodePage) {
738 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(position));
739 if (SC_CP_UTF8 == dbcsCodePage) {
740 if (UTF8IsAscii(leadByte)) {
741 // Single byte character or invalid
742 character = leadByte;
743 } else {
744 const int widthCharBytes = UTF8BytesOfLead[leadByte];
745 unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0};
746 for (int b=1; b<widthCharBytes; b++)
747 charBytes[b] = static_cast<unsigned char>(cb.CharAt(position+b));
748 int utf8status = UTF8Classify(charBytes, widthCharBytes);
749 if (utf8status & UTF8MaskInvalid) {
750 // Report as singleton surrogate values which are invalid Unicode
751 character = 0xDC80 + leadByte;
752 } else {
753 bytesInCharacter = utf8status & UTF8MaskWidth;
754 character = UnicodeFromBytes(charBytes);
757 } else {
758 if (IsDBCSLeadByte(leadByte)) {
759 bytesInCharacter = 2;
760 character = (leadByte << 8) | static_cast<unsigned char>(cb.CharAt(position+1));
761 } else {
762 character = leadByte;
765 } else {
766 character = cb.CharAt(position);
768 if (pWidth) {
769 *pWidth = bytesInCharacter;
771 return character;
774 int SCI_METHOD Document::CodePage() const {
775 return dbcsCodePage;
778 bool SCI_METHOD Document::IsDBCSLeadByte(char ch) const {
779 // Byte ranges found in Wikipedia articles with relevant search strings in each case
780 unsigned char uch = static_cast<unsigned char>(ch);
781 switch (dbcsCodePage) {
782 case 932:
783 // Shift_jis
784 return ((uch >= 0x81) && (uch <= 0x9F)) ||
785 ((uch >= 0xE0) && (uch <= 0xFC));
786 // Lead bytes F0 to FC may be a Microsoft addition.
787 case 936:
788 // GBK
789 return (uch >= 0x81) && (uch <= 0xFE);
790 case 949:
791 // Korean Wansung KS C-5601-1987
792 return (uch >= 0x81) && (uch <= 0xFE);
793 case 950:
794 // Big5
795 return (uch >= 0x81) && (uch <= 0xFE);
796 case 1361:
797 // Korean Johab KS C-5601-1992
798 return
799 ((uch >= 0x84) && (uch <= 0xD3)) ||
800 ((uch >= 0xD8) && (uch <= 0xDE)) ||
801 ((uch >= 0xE0) && (uch <= 0xF9));
803 return false;
806 static inline bool IsSpaceOrTab(int ch) {
807 return ch == ' ' || ch == '\t';
810 // Need to break text into segments near lengthSegment but taking into
811 // account the encoding to not break inside a UTF-8 or DBCS character
812 // and also trying to avoid breaking inside a pair of combining characters.
813 // The segment length must always be long enough (more than 4 bytes)
814 // so that there will be at least one whole character to make a segment.
815 // For UTF-8, text must consist only of valid whole characters.
816 // In preference order from best to worst:
817 // 1) Break after space
818 // 2) Break before punctuation
819 // 3) Break after whole character
821 int Document::SafeSegment(const char *text, int length, int lengthSegment) const {
822 if (length <= lengthSegment)
823 return length;
824 int lastSpaceBreak = -1;
825 int lastPunctuationBreak = -1;
826 int lastEncodingAllowedBreak = 0;
827 for (int j=0; j < lengthSegment;) {
828 unsigned char ch = static_cast<unsigned char>(text[j]);
829 if (j > 0) {
830 if (IsSpaceOrTab(text[j - 1]) && !IsSpaceOrTab(text[j])) {
831 lastSpaceBreak = j;
833 if (ch < 'A') {
834 lastPunctuationBreak = j;
837 lastEncodingAllowedBreak = j;
839 if (dbcsCodePage == SC_CP_UTF8) {
840 j += UTF8BytesOfLead[ch];
841 } else if (dbcsCodePage) {
842 j += IsDBCSLeadByte(ch) ? 2 : 1;
843 } else {
844 j++;
847 if (lastSpaceBreak >= 0) {
848 return lastSpaceBreak;
849 } else if (lastPunctuationBreak >= 0) {
850 return lastPunctuationBreak;
852 return lastEncodingAllowedBreak;
855 EncodingFamily Document::CodePageFamily() const {
856 if (SC_CP_UTF8 == dbcsCodePage)
857 return efUnicode;
858 else if (dbcsCodePage)
859 return efDBCS;
860 else
861 return efEightBit;
864 void Document::ModifiedAt(int pos) {
865 if (endStyled > pos)
866 endStyled = pos;
869 void Document::CheckReadOnly() {
870 if (cb.IsReadOnly() && enteredReadOnlyCount == 0) {
871 enteredReadOnlyCount++;
872 NotifyModifyAttempt();
873 enteredReadOnlyCount--;
877 // Document only modified by gateways DeleteChars, InsertString, Undo, Redo, and SetStyleAt.
878 // SetStyleAt does not change the persistent state of a document
880 bool Document::DeleteChars(int pos, int len) {
881 if (len <= 0)
882 return false;
883 if ((pos + len) > Length())
884 return false;
885 CheckReadOnly();
886 if (enteredModification != 0) {
887 return false;
888 } else {
889 enteredModification++;
890 if (!cb.IsReadOnly()) {
891 NotifyModified(
892 DocModification(
893 SC_MOD_BEFOREDELETE | SC_PERFORMED_USER,
894 pos, len,
895 0, 0));
896 int prevLinesTotal = LinesTotal();
897 bool startSavePoint = cb.IsSavePoint();
898 bool startSequence = false;
899 const char *text = cb.DeleteChars(pos, len, startSequence);
900 if (startSavePoint && cb.IsCollectingUndo())
901 NotifySavePoint(!startSavePoint);
902 if ((pos < Length()) || (pos == 0))
903 ModifiedAt(pos);
904 else
905 ModifiedAt(pos-1);
906 NotifyModified(
907 DocModification(
908 SC_MOD_DELETETEXT | SC_PERFORMED_USER | (startSequence?SC_STARTACTION:0),
909 pos, len,
910 LinesTotal() - prevLinesTotal, text));
912 enteredModification--;
914 return !cb.IsReadOnly();
918 * Insert a string with a length.
920 int Document::InsertString(int position, const char *s, int insertLength) {
921 if (insertLength <= 0) {
922 return 0;
924 CheckReadOnly(); // Application may change read only state here
925 if (cb.IsReadOnly()) {
926 return 0;
928 if (enteredModification != 0) {
929 return 0;
931 enteredModification++;
932 insertionSet = false;
933 insertion.clear();
934 NotifyModified(
935 DocModification(
936 SC_MOD_INSERTCHECK,
937 position, insertLength,
938 0, s));
939 if (insertionSet) {
940 s = insertion.c_str();
941 insertLength = static_cast<int>(insertion.length());
943 NotifyModified(
944 DocModification(
945 SC_MOD_BEFOREINSERT | SC_PERFORMED_USER,
946 position, insertLength,
947 0, s));
948 int prevLinesTotal = LinesTotal();
949 bool startSavePoint = cb.IsSavePoint();
950 bool startSequence = false;
951 const char *text = cb.InsertString(position, s, insertLength, startSequence);
952 if (startSavePoint && cb.IsCollectingUndo())
953 NotifySavePoint(!startSavePoint);
954 ModifiedAt(position);
955 NotifyModified(
956 DocModification(
957 SC_MOD_INSERTTEXT | SC_PERFORMED_USER | (startSequence?SC_STARTACTION:0),
958 position, insertLength,
959 LinesTotal() - prevLinesTotal, text));
960 if (insertionSet) { // Free memory as could be large
961 std::string().swap(insertion);
963 enteredModification--;
964 return insertLength;
967 void Document::ChangeInsertion(const char *s, int length) {
968 insertionSet = true;
969 insertion.assign(s, length);
972 int SCI_METHOD Document::AddData(char *data, int length) {
973 try {
974 int position = Length();
975 InsertString(position, data, length);
976 } catch (std::bad_alloc &) {
977 return SC_STATUS_BADALLOC;
978 } catch (...) {
979 return SC_STATUS_FAILURE;
981 return 0;
984 void * SCI_METHOD Document::ConvertToDocument() {
985 return this;
988 int Document::Undo() {
989 int newPos = -1;
990 CheckReadOnly();
991 if ((enteredModification == 0) && (cb.IsCollectingUndo())) {
992 enteredModification++;
993 if (!cb.IsReadOnly()) {
994 bool startSavePoint = cb.IsSavePoint();
995 bool multiLine = false;
996 int steps = cb.StartUndo();
997 //Platform::DebugPrintf("Steps=%d\n", steps);
998 int coalescedRemovePos = -1;
999 int coalescedRemoveLen = 0;
1000 int prevRemoveActionPos = -1;
1001 int prevRemoveActionLen = 0;
1002 for (int step = 0; step < steps; step++) {
1003 const int prevLinesTotal = LinesTotal();
1004 const Action &action = cb.GetUndoStep();
1005 if (action.at == removeAction) {
1006 NotifyModified(DocModification(
1007 SC_MOD_BEFOREINSERT | SC_PERFORMED_UNDO, action));
1008 } else if (action.at == containerAction) {
1009 DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_UNDO);
1010 dm.token = action.position;
1011 NotifyModified(dm);
1012 if (!action.mayCoalesce) {
1013 coalescedRemovePos = -1;
1014 coalescedRemoveLen = 0;
1015 prevRemoveActionPos = -1;
1016 prevRemoveActionLen = 0;
1018 } else {
1019 NotifyModified(DocModification(
1020 SC_MOD_BEFOREDELETE | SC_PERFORMED_UNDO, action));
1022 cb.PerformUndoStep();
1023 if (action.at != containerAction) {
1024 ModifiedAt(action.position);
1025 newPos = action.position;
1028 int modFlags = SC_PERFORMED_UNDO;
1029 // With undo, an insertion action becomes a deletion notification
1030 if (action.at == removeAction) {
1031 newPos += action.lenData;
1032 modFlags |= SC_MOD_INSERTTEXT;
1033 if ((coalescedRemoveLen > 0) &&
1034 (action.position == prevRemoveActionPos || action.position == (prevRemoveActionPos + prevRemoveActionLen))) {
1035 coalescedRemoveLen += action.lenData;
1036 newPos = coalescedRemovePos + coalescedRemoveLen;
1037 } else {
1038 coalescedRemovePos = action.position;
1039 coalescedRemoveLen = action.lenData;
1041 prevRemoveActionPos = action.position;
1042 prevRemoveActionLen = action.lenData;
1043 } else if (action.at == insertAction) {
1044 modFlags |= SC_MOD_DELETETEXT;
1045 coalescedRemovePos = -1;
1046 coalescedRemoveLen = 0;
1047 prevRemoveActionPos = -1;
1048 prevRemoveActionLen = 0;
1050 if (steps > 1)
1051 modFlags |= SC_MULTISTEPUNDOREDO;
1052 const int linesAdded = LinesTotal() - prevLinesTotal;
1053 if (linesAdded != 0)
1054 multiLine = true;
1055 if (step == steps - 1) {
1056 modFlags |= SC_LASTSTEPINUNDOREDO;
1057 if (multiLine)
1058 modFlags |= SC_MULTILINEUNDOREDO;
1060 NotifyModified(DocModification(modFlags, action.position, action.lenData,
1061 linesAdded, action.data));
1064 bool endSavePoint = cb.IsSavePoint();
1065 if (startSavePoint != endSavePoint)
1066 NotifySavePoint(endSavePoint);
1068 enteredModification--;
1070 return newPos;
1073 int Document::Redo() {
1074 int newPos = -1;
1075 CheckReadOnly();
1076 if ((enteredModification == 0) && (cb.IsCollectingUndo())) {
1077 enteredModification++;
1078 if (!cb.IsReadOnly()) {
1079 bool startSavePoint = cb.IsSavePoint();
1080 bool multiLine = false;
1081 int steps = cb.StartRedo();
1082 for (int step = 0; step < steps; step++) {
1083 const int prevLinesTotal = LinesTotal();
1084 const Action &action = cb.GetRedoStep();
1085 if (action.at == insertAction) {
1086 NotifyModified(DocModification(
1087 SC_MOD_BEFOREINSERT | SC_PERFORMED_REDO, action));
1088 } else if (action.at == containerAction) {
1089 DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_REDO);
1090 dm.token = action.position;
1091 NotifyModified(dm);
1092 } else {
1093 NotifyModified(DocModification(
1094 SC_MOD_BEFOREDELETE | SC_PERFORMED_REDO, action));
1096 cb.PerformRedoStep();
1097 if (action.at != containerAction) {
1098 ModifiedAt(action.position);
1099 newPos = action.position;
1102 int modFlags = SC_PERFORMED_REDO;
1103 if (action.at == insertAction) {
1104 newPos += action.lenData;
1105 modFlags |= SC_MOD_INSERTTEXT;
1106 } else if (action.at == removeAction) {
1107 modFlags |= SC_MOD_DELETETEXT;
1109 if (steps > 1)
1110 modFlags |= SC_MULTISTEPUNDOREDO;
1111 const int linesAdded = LinesTotal() - prevLinesTotal;
1112 if (linesAdded != 0)
1113 multiLine = true;
1114 if (step == steps - 1) {
1115 modFlags |= SC_LASTSTEPINUNDOREDO;
1116 if (multiLine)
1117 modFlags |= SC_MULTILINEUNDOREDO;
1119 NotifyModified(
1120 DocModification(modFlags, action.position, action.lenData,
1121 linesAdded, action.data));
1124 bool endSavePoint = cb.IsSavePoint();
1125 if (startSavePoint != endSavePoint)
1126 NotifySavePoint(endSavePoint);
1128 enteredModification--;
1130 return newPos;
1133 void Document::DelChar(int pos) {
1134 DeleteChars(pos, LenChar(pos));
1137 void Document::DelCharBack(int pos) {
1138 if (pos <= 0) {
1139 return;
1140 } else if (IsCrLf(pos - 2)) {
1141 DeleteChars(pos - 2, 2);
1142 } else if (dbcsCodePage) {
1143 int startChar = NextPosition(pos, -1);
1144 DeleteChars(startChar, pos - startChar);
1145 } else {
1146 DeleteChars(pos - 1, 1);
1150 static int NextTab(int pos, int tabSize) {
1151 return ((pos / tabSize) + 1) * tabSize;
1154 static std::string CreateIndentation(int indent, int tabSize, bool insertSpaces) {
1155 std::string indentation;
1156 if (!insertSpaces) {
1157 while (indent >= tabSize) {
1158 indentation += '\t';
1159 indent -= tabSize;
1162 while (indent > 0) {
1163 indentation += ' ';
1164 indent--;
1166 return indentation;
1169 int SCI_METHOD Document::GetLineIndentation(int line) {
1170 int indent = 0;
1171 if ((line >= 0) && (line < LinesTotal())) {
1172 int lineStart = LineStart(line);
1173 int length = Length();
1174 for (int i = lineStart; i < length; i++) {
1175 char ch = cb.CharAt(i);
1176 if (ch == ' ')
1177 indent++;
1178 else if (ch == '\t')
1179 indent = NextTab(indent, tabInChars);
1180 else
1181 return indent;
1184 return indent;
1187 int Document::SetLineIndentation(int line, int indent) {
1188 int indentOfLine = GetLineIndentation(line);
1189 if (indent < 0)
1190 indent = 0;
1191 if (indent != indentOfLine) {
1192 std::string linebuf = CreateIndentation(indent, tabInChars, !useTabs);
1193 int thisLineStart = LineStart(line);
1194 int indentPos = GetLineIndentPosition(line);
1195 UndoGroup ug(this);
1196 DeleteChars(thisLineStart, indentPos - thisLineStart);
1197 return thisLineStart + InsertString(thisLineStart, linebuf.c_str(),
1198 static_cast<int>(linebuf.length()));
1199 } else {
1200 return GetLineIndentPosition(line);
1204 int Document::GetLineIndentPosition(int line) const {
1205 if (line < 0)
1206 return 0;
1207 int pos = LineStart(line);
1208 int length = Length();
1209 while ((pos < length) && IsSpaceOrTab(cb.CharAt(pos))) {
1210 pos++;
1212 return pos;
1215 int Document::GetColumn(int pos) {
1216 int column = 0;
1217 int line = LineFromPosition(pos);
1218 if ((line >= 0) && (line < LinesTotal())) {
1219 for (int i = LineStart(line); i < pos;) {
1220 char ch = cb.CharAt(i);
1221 if (ch == '\t') {
1222 column = NextTab(column, tabInChars);
1223 i++;
1224 } else if (ch == '\r') {
1225 return column;
1226 } else if (ch == '\n') {
1227 return column;
1228 } else if (i >= Length()) {
1229 return column;
1230 } else {
1231 column++;
1232 i = NextPosition(i, 1);
1236 return column;
1239 int Document::CountCharacters(int startPos, int endPos) {
1240 startPos = MovePositionOutsideChar(startPos, 1, false);
1241 endPos = MovePositionOutsideChar(endPos, -1, false);
1242 int count = 0;
1243 int i = startPos;
1244 while (i < endPos) {
1245 count++;
1246 if (IsCrLf(i))
1247 i++;
1248 i = NextPosition(i, 1);
1250 return count;
1253 int Document::FindColumn(int line, int column) {
1254 int position = LineStart(line);
1255 if ((line >= 0) && (line < LinesTotal())) {
1256 int columnCurrent = 0;
1257 while ((columnCurrent < column) && (position < Length())) {
1258 char ch = cb.CharAt(position);
1259 if (ch == '\t') {
1260 columnCurrent = NextTab(columnCurrent, tabInChars);
1261 if (columnCurrent > column)
1262 return position;
1263 position++;
1264 } else if (ch == '\r') {
1265 return position;
1266 } else if (ch == '\n') {
1267 return position;
1268 } else {
1269 columnCurrent++;
1270 position = NextPosition(position, 1);
1274 return position;
1277 void Document::Indent(bool forwards, int lineBottom, int lineTop) {
1278 // Dedent - suck white space off the front of the line to dedent by equivalent of a tab
1279 for (int line = lineBottom; line >= lineTop; line--) {
1280 int indentOfLine = GetLineIndentation(line);
1281 if (forwards) {
1282 if (LineStart(line) < LineEnd(line)) {
1283 SetLineIndentation(line, indentOfLine + IndentSize());
1285 } else {
1286 SetLineIndentation(line, indentOfLine - IndentSize());
1291 // Convert line endings for a piece of text to a particular mode.
1292 // Stop at len or when a NUL is found.
1293 std::string Document::TransformLineEnds(const char *s, size_t len, int eolModeWanted) {
1294 std::string dest;
1295 for (size_t i = 0; (i < len) && (s[i]); i++) {
1296 if (s[i] == '\n' || s[i] == '\r') {
1297 if (eolModeWanted == SC_EOL_CR) {
1298 dest.push_back('\r');
1299 } else if (eolModeWanted == SC_EOL_LF) {
1300 dest.push_back('\n');
1301 } else { // eolModeWanted == SC_EOL_CRLF
1302 dest.push_back('\r');
1303 dest.push_back('\n');
1305 if ((s[i] == '\r') && (i+1 < len) && (s[i+1] == '\n')) {
1306 i++;
1308 } else {
1309 dest.push_back(s[i]);
1312 return dest;
1315 void Document::ConvertLineEnds(int eolModeSet) {
1316 UndoGroup ug(this);
1318 for (int pos = 0; pos < Length(); pos++) {
1319 if (cb.CharAt(pos) == '\r') {
1320 if (cb.CharAt(pos + 1) == '\n') {
1321 // CRLF
1322 if (eolModeSet == SC_EOL_CR) {
1323 DeleteChars(pos + 1, 1); // Delete the LF
1324 } else if (eolModeSet == SC_EOL_LF) {
1325 DeleteChars(pos, 1); // Delete the CR
1326 } else {
1327 pos++;
1329 } else {
1330 // CR
1331 if (eolModeSet == SC_EOL_CRLF) {
1332 pos += InsertString(pos + 1, "\n", 1); // Insert LF
1333 } else if (eolModeSet == SC_EOL_LF) {
1334 pos += InsertString(pos, "\n", 1); // Insert LF
1335 DeleteChars(pos, 1); // Delete CR
1336 pos--;
1339 } else if (cb.CharAt(pos) == '\n') {
1340 // LF
1341 if (eolModeSet == SC_EOL_CRLF) {
1342 pos += InsertString(pos, "\r", 1); // Insert CR
1343 } else if (eolModeSet == SC_EOL_CR) {
1344 pos += InsertString(pos, "\r", 1); // Insert CR
1345 DeleteChars(pos, 1); // Delete LF
1346 pos--;
1353 bool Document::IsWhiteLine(int line) const {
1354 int currentChar = LineStart(line);
1355 int endLine = LineEnd(line);
1356 while (currentChar < endLine) {
1357 if (cb.CharAt(currentChar) != ' ' && cb.CharAt(currentChar) != '\t') {
1358 return false;
1360 ++currentChar;
1362 return true;
1365 int Document::ParaUp(int pos) const {
1366 int line = LineFromPosition(pos);
1367 line--;
1368 while (line >= 0 && IsWhiteLine(line)) { // skip empty lines
1369 line--;
1371 while (line >= 0 && !IsWhiteLine(line)) { // skip non-empty lines
1372 line--;
1374 line++;
1375 return LineStart(line);
1378 int Document::ParaDown(int pos) const {
1379 int line = LineFromPosition(pos);
1380 while (line < LinesTotal() && !IsWhiteLine(line)) { // skip non-empty lines
1381 line++;
1383 while (line < LinesTotal() && IsWhiteLine(line)) { // skip empty lines
1384 line++;
1386 if (line < LinesTotal())
1387 return LineStart(line);
1388 else // end of a document
1389 return LineEnd(line-1);
1392 CharClassify::cc Document::WordCharClass(unsigned char ch) const {
1393 if ((SC_CP_UTF8 == dbcsCodePage) && (!UTF8IsAscii(ch)))
1394 return CharClassify::ccWord;
1395 return charClass.GetClass(ch);
1399 * Used by commmands that want to select whole words.
1400 * Finds the start of word at pos when delta < 0 or the end of the word when delta >= 0.
1402 int Document::ExtendWordSelect(int pos, int delta, bool onlyWordCharacters) {
1403 CharClassify::cc ccStart = CharClassify::ccWord;
1404 if (delta < 0) {
1405 if (!onlyWordCharacters)
1406 ccStart = WordCharClass(cb.CharAt(pos-1));
1407 while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == ccStart))
1408 pos--;
1409 } else {
1410 if (!onlyWordCharacters && pos < Length())
1411 ccStart = WordCharClass(cb.CharAt(pos));
1412 while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == ccStart))
1413 pos++;
1415 return MovePositionOutsideChar(pos, delta, true);
1419 * Find the start of the next word in either a forward (delta >= 0) or backwards direction
1420 * (delta < 0).
1421 * This is looking for a transition between character classes although there is also some
1422 * additional movement to transit white space.
1423 * Used by cursor movement by word commands.
1425 int Document::NextWordStart(int pos, int delta) {
1426 if (delta < 0) {
1427 while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == CharClassify::ccSpace))
1428 pos--;
1429 if (pos > 0) {
1430 CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos-1));
1431 while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == ccStart)) {
1432 pos--;
1435 } else {
1436 CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos));
1437 while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == ccStart))
1438 pos++;
1439 while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == CharClassify::ccSpace))
1440 pos++;
1442 return pos;
1446 * Find the end of the next word in either a forward (delta >= 0) or backwards direction
1447 * (delta < 0).
1448 * This is looking for a transition between character classes although there is also some
1449 * additional movement to transit white space.
1450 * Used by cursor movement by word commands.
1452 int Document::NextWordEnd(int pos, int delta) {
1453 if (delta < 0) {
1454 if (pos > 0) {
1455 CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos-1));
1456 if (ccStart != CharClassify::ccSpace) {
1457 while (pos > 0 && WordCharClass(cb.CharAt(pos - 1)) == ccStart) {
1458 pos--;
1461 while (pos > 0 && WordCharClass(cb.CharAt(pos - 1)) == CharClassify::ccSpace) {
1462 pos--;
1465 } else {
1466 while (pos < Length() && WordCharClass(cb.CharAt(pos)) == CharClassify::ccSpace) {
1467 pos++;
1469 if (pos < Length()) {
1470 CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos));
1471 while (pos < Length() && WordCharClass(cb.CharAt(pos)) == ccStart) {
1472 pos++;
1476 return pos;
1480 * Check that the character at the given position is a word or punctuation character and that
1481 * the previous character is of a different character class.
1483 bool Document::IsWordStartAt(int pos) const {
1484 if (pos > 0) {
1485 CharClassify::cc ccPos = WordCharClass(CharAt(pos));
1486 return (ccPos == CharClassify::ccWord || ccPos == CharClassify::ccPunctuation) &&
1487 (ccPos != WordCharClass(CharAt(pos - 1)));
1489 return true;
1493 * Check that the character at the given position is a word or punctuation character and that
1494 * the next character is of a different character class.
1496 bool Document::IsWordEndAt(int pos) const {
1497 if (pos < Length()) {
1498 CharClassify::cc ccPrev = WordCharClass(CharAt(pos-1));
1499 return (ccPrev == CharClassify::ccWord || ccPrev == CharClassify::ccPunctuation) &&
1500 (ccPrev != WordCharClass(CharAt(pos)));
1502 return true;
1506 * Check that the given range is has transitions between character classes at both
1507 * ends and where the characters on the inside are word or punctuation characters.
1509 bool Document::IsWordAt(int start, int end) const {
1510 return IsWordStartAt(start) && IsWordEndAt(end);
1513 bool Document::MatchesWordOptions(bool word, bool wordStart, int pos, int length) const {
1514 return (!word && !wordStart) ||
1515 (word && IsWordAt(pos, pos + length)) ||
1516 (wordStart && IsWordStartAt(pos));
1519 bool Document::HasCaseFolder(void) const {
1520 return pcf != 0;
1523 void Document::SetCaseFolder(CaseFolder *pcf_) {
1524 delete pcf;
1525 pcf = pcf_;
1529 * Find text in document, supporting both forward and backward
1530 * searches (just pass minPos > maxPos to do a backward search)
1531 * Has not been tested with backwards DBCS searches yet.
1533 long Document::FindText(int minPos, int maxPos, const char *search,
1534 bool caseSensitive, bool word, bool wordStart, bool regExp, int flags,
1535 int *length) {
1536 if (*length <= 0)
1537 return minPos;
1538 if (regExp) {
1539 if (!regex)
1540 regex = CreateRegexSearch(&charClass);
1541 return regex->FindText(this, minPos, maxPos, search, caseSensitive, word, wordStart, flags, length);
1542 } else {
1544 const bool forward = minPos <= maxPos;
1545 const int increment = forward ? 1 : -1;
1547 // Range endpoints should not be inside DBCS characters, but just in case, move them.
1548 const int startPos = MovePositionOutsideChar(minPos, increment, false);
1549 const int endPos = MovePositionOutsideChar(maxPos, increment, false);
1551 // Compute actual search ranges needed
1552 const int lengthFind = *length;
1554 //Platform::DebugPrintf("Find %d %d %s %d\n", startPos, endPos, ft->lpstrText, lengthFind);
1555 const int limitPos = Platform::Maximum(startPos, endPos);
1556 int pos = startPos;
1557 if (!forward) {
1558 // Back all of a character
1559 pos = NextPosition(pos, increment);
1561 if (caseSensitive) {
1562 const int endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos;
1563 const char charStartSearch = search[0];
1564 while (forward ? (pos < endSearch) : (pos >= endSearch)) {
1565 if (CharAt(pos) == charStartSearch) {
1566 bool found = (pos + lengthFind) <= limitPos;
1567 for (int indexSearch = 1; (indexSearch < lengthFind) && found; indexSearch++) {
1568 found = CharAt(pos + indexSearch) == search[indexSearch];
1570 if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
1571 return pos;
1574 if (!NextCharacter(pos, increment))
1575 break;
1577 } else if (SC_CP_UTF8 == dbcsCodePage) {
1578 const size_t maxFoldingExpansion = 4;
1579 std::vector<char> searchThing(lengthFind * UTF8MaxBytes * maxFoldingExpansion + 1);
1580 const int lenSearch = static_cast<int>(
1581 pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind));
1582 char bytes[UTF8MaxBytes + 1];
1583 char folded[UTF8MaxBytes * maxFoldingExpansion + 1];
1584 while (forward ? (pos < endPos) : (pos >= endPos)) {
1585 int widthFirstCharacter = 0;
1586 int posIndexDocument = pos;
1587 int indexSearch = 0;
1588 bool characterMatches = true;
1589 for (;;) {
1590 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(posIndexDocument));
1591 bytes[0] = leadByte;
1592 int widthChar = 1;
1593 if (!UTF8IsAscii(leadByte)) {
1594 const int widthCharBytes = UTF8BytesOfLead[leadByte];
1595 for (int b=1; b<widthCharBytes; b++) {
1596 bytes[b] = cb.CharAt(posIndexDocument+b);
1598 widthChar = UTF8Classify(reinterpret_cast<const unsigned char *>(bytes), widthCharBytes) & UTF8MaskWidth;
1600 if (!widthFirstCharacter)
1601 widthFirstCharacter = widthChar;
1602 if ((posIndexDocument + widthChar) > limitPos)
1603 break;
1604 const int lenFlat = static_cast<int>(pcf->Fold(folded, sizeof(folded), bytes, widthChar));
1605 folded[lenFlat] = 0;
1606 // Does folded match the buffer
1607 characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat);
1608 if (!characterMatches)
1609 break;
1610 posIndexDocument += widthChar;
1611 indexSearch += lenFlat;
1612 if (indexSearch >= lenSearch)
1613 break;
1615 if (characterMatches && (indexSearch == static_cast<int>(lenSearch))) {
1616 if (MatchesWordOptions(word, wordStart, pos, posIndexDocument - pos)) {
1617 *length = posIndexDocument - pos;
1618 return pos;
1621 if (forward) {
1622 pos += widthFirstCharacter;
1623 } else {
1624 if (!NextCharacter(pos, increment))
1625 break;
1628 } else if (dbcsCodePage) {
1629 const size_t maxBytesCharacter = 2;
1630 const size_t maxFoldingExpansion = 4;
1631 std::vector<char> searchThing(lengthFind * maxBytesCharacter * maxFoldingExpansion + 1);
1632 const int lenSearch = static_cast<int>(
1633 pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind));
1634 while (forward ? (pos < endPos) : (pos >= endPos)) {
1635 int indexDocument = 0;
1636 int indexSearch = 0;
1637 bool characterMatches = true;
1638 while (characterMatches &&
1639 ((pos + indexDocument) < limitPos) &&
1640 (indexSearch < lenSearch)) {
1641 char bytes[maxBytesCharacter + 1];
1642 bytes[0] = cb.CharAt(pos + indexDocument);
1643 const int widthChar = IsDBCSLeadByte(bytes[0]) ? 2 : 1;
1644 if (widthChar == 2)
1645 bytes[1] = cb.CharAt(pos + indexDocument + 1);
1646 if ((pos + indexDocument + widthChar) > limitPos)
1647 break;
1648 char folded[maxBytesCharacter * maxFoldingExpansion + 1];
1649 const int lenFlat = static_cast<int>(pcf->Fold(folded, sizeof(folded), bytes, widthChar));
1650 folded[lenFlat] = 0;
1651 // Does folded match the buffer
1652 characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat);
1653 indexDocument += widthChar;
1654 indexSearch += lenFlat;
1656 if (characterMatches && (indexSearch == static_cast<int>(lenSearch))) {
1657 if (MatchesWordOptions(word, wordStart, pos, indexDocument)) {
1658 *length = indexDocument;
1659 return pos;
1662 if (!NextCharacter(pos, increment))
1663 break;
1665 } else {
1666 const int endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos;
1667 std::vector<char> searchThing(lengthFind + 1);
1668 pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind);
1669 while (forward ? (pos < endSearch) : (pos >= endSearch)) {
1670 bool found = (pos + lengthFind) <= limitPos;
1671 for (int indexSearch = 0; (indexSearch < lengthFind) && found; indexSearch++) {
1672 char ch = CharAt(pos + indexSearch);
1673 char folded[2];
1674 pcf->Fold(folded, sizeof(folded), &ch, 1);
1675 found = folded[0] == searchThing[indexSearch];
1677 if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
1678 return pos;
1680 if (!NextCharacter(pos, increment))
1681 break;
1685 //Platform::DebugPrintf("Not found\n");
1686 return -1;
1689 const char *Document::SubstituteByPosition(const char *text, int *length) {
1690 if (regex)
1691 return regex->SubstituteByPosition(this, text, length);
1692 else
1693 return 0;
1696 int Document::LinesTotal() const {
1697 return cb.Lines();
1700 void Document::SetDefaultCharClasses(bool includeWordClass) {
1701 charClass.SetDefaultCharClasses(includeWordClass);
1704 void Document::SetCharClasses(const unsigned char *chars, CharClassify::cc newCharClass) {
1705 charClass.SetCharClasses(chars, newCharClass);
1708 int Document::GetCharsOfClass(CharClassify::cc characterClass, unsigned char *buffer) {
1709 return charClass.GetCharsOfClass(characterClass, buffer);
1712 void SCI_METHOD Document::StartStyling(int position, char) {
1713 endStyled = position;
1716 bool SCI_METHOD Document::SetStyleFor(int length, char style) {
1717 if (enteredStyling != 0) {
1718 return false;
1719 } else {
1720 enteredStyling++;
1721 int prevEndStyled = endStyled;
1722 if (cb.SetStyleFor(endStyled, length, style)) {
1723 DocModification mh(SC_MOD_CHANGESTYLE | SC_PERFORMED_USER,
1724 prevEndStyled, length);
1725 NotifyModified(mh);
1727 endStyled += length;
1728 enteredStyling--;
1729 return true;
1733 bool SCI_METHOD Document::SetStyles(int length, const char *styles) {
1734 if (enteredStyling != 0) {
1735 return false;
1736 } else {
1737 enteredStyling++;
1738 bool didChange = false;
1739 int startMod = 0;
1740 int endMod = 0;
1741 for (int iPos = 0; iPos < length; iPos++, endStyled++) {
1742 PLATFORM_ASSERT(endStyled < Length());
1743 if (cb.SetStyleAt(endStyled, styles[iPos])) {
1744 if (!didChange) {
1745 startMod = endStyled;
1747 didChange = true;
1748 endMod = endStyled;
1751 if (didChange) {
1752 DocModification mh(SC_MOD_CHANGESTYLE | SC_PERFORMED_USER,
1753 startMod, endMod - startMod + 1);
1754 NotifyModified(mh);
1756 enteredStyling--;
1757 return true;
1761 void Document::EnsureStyledTo(int pos) {
1762 if ((enteredStyling == 0) && (pos > GetEndStyled())) {
1763 IncrementStyleClock();
1764 if (pli && !pli->UseContainerLexing()) {
1765 int lineEndStyled = LineFromPosition(GetEndStyled());
1766 int endStyledTo = LineStart(lineEndStyled);
1767 pli->Colourise(endStyledTo, pos);
1768 } else {
1769 // Ask the watchers to style, and stop as soon as one responds.
1770 for (std::vector<WatcherWithUserData>::iterator it = watchers.begin();
1771 (pos > GetEndStyled()) && (it != watchers.end()); ++it) {
1772 it->watcher->NotifyStyleNeeded(this, it->userData, pos);
1778 void Document::LexerChanged() {
1779 // Tell the watchers the lexer has changed.
1780 for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
1781 it->watcher->NotifyLexerChanged(this, it->userData);
1785 int SCI_METHOD Document::SetLineState(int line, int state) {
1786 int statePrevious = static_cast<LineState *>(perLineData[ldState])->SetLineState(line, state);
1787 if (state != statePrevious) {
1788 DocModification mh(SC_MOD_CHANGELINESTATE, LineStart(line), 0, 0, 0, line);
1789 NotifyModified(mh);
1791 return statePrevious;
1794 int SCI_METHOD Document::GetLineState(int line) const {
1795 return static_cast<LineState *>(perLineData[ldState])->GetLineState(line);
1798 int Document::GetMaxLineState() {
1799 return static_cast<LineState *>(perLineData[ldState])->GetMaxLineState();
1802 void SCI_METHOD Document::ChangeLexerState(int start, int end) {
1803 DocModification mh(SC_MOD_LEXERSTATE, start, end-start, 0, 0, 0);
1804 NotifyModified(mh);
1807 StyledText Document::MarginStyledText(int line) const {
1808 LineAnnotation *pla = static_cast<LineAnnotation *>(perLineData[ldMargin]);
1809 return StyledText(pla->Length(line), pla->Text(line),
1810 pla->MultipleStyles(line), pla->Style(line), pla->Styles(line));
1813 void Document::MarginSetText(int line, const char *text) {
1814 static_cast<LineAnnotation *>(perLineData[ldMargin])->SetText(line, text);
1815 DocModification mh(SC_MOD_CHANGEMARGIN, LineStart(line), 0, 0, 0, line);
1816 NotifyModified(mh);
1819 void Document::MarginSetStyle(int line, int style) {
1820 static_cast<LineAnnotation *>(perLineData[ldMargin])->SetStyle(line, style);
1821 NotifyModified(DocModification(SC_MOD_CHANGEMARGIN, LineStart(line), 0, 0, 0, line));
1824 void Document::MarginSetStyles(int line, const unsigned char *styles) {
1825 static_cast<LineAnnotation *>(perLineData[ldMargin])->SetStyles(line, styles);
1826 NotifyModified(DocModification(SC_MOD_CHANGEMARGIN, LineStart(line), 0, 0, 0, line));
1829 void Document::MarginClearAll() {
1830 int maxEditorLine = LinesTotal();
1831 for (int l=0; l<maxEditorLine; l++)
1832 MarginSetText(l, 0);
1833 // Free remaining data
1834 static_cast<LineAnnotation *>(perLineData[ldMargin])->ClearAll();
1837 StyledText Document::AnnotationStyledText(int line) const {
1838 LineAnnotation *pla = static_cast<LineAnnotation *>(perLineData[ldAnnotation]);
1839 return StyledText(pla->Length(line), pla->Text(line),
1840 pla->MultipleStyles(line), pla->Style(line), pla->Styles(line));
1843 void Document::AnnotationSetText(int line, const char *text) {
1844 if (line >= 0 && line < LinesTotal()) {
1845 const int linesBefore = AnnotationLines(line);
1846 static_cast<LineAnnotation *>(perLineData[ldAnnotation])->SetText(line, text);
1847 const int linesAfter = AnnotationLines(line);
1848 DocModification mh(SC_MOD_CHANGEANNOTATION, LineStart(line), 0, 0, 0, line);
1849 mh.annotationLinesAdded = linesAfter - linesBefore;
1850 NotifyModified(mh);
1854 void Document::AnnotationSetStyle(int line, int style) {
1855 static_cast<LineAnnotation *>(perLineData[ldAnnotation])->SetStyle(line, style);
1856 DocModification mh(SC_MOD_CHANGEANNOTATION, LineStart(line), 0, 0, 0, line);
1857 NotifyModified(mh);
1860 void Document::AnnotationSetStyles(int line, const unsigned char *styles) {
1861 if (line >= 0 && line < LinesTotal()) {
1862 static_cast<LineAnnotation *>(perLineData[ldAnnotation])->SetStyles(line, styles);
1866 int Document::AnnotationLines(int line) const {
1867 return static_cast<LineAnnotation *>(perLineData[ldAnnotation])->Lines(line);
1870 void Document::AnnotationClearAll() {
1871 int maxEditorLine = LinesTotal();
1872 for (int l=0; l<maxEditorLine; l++)
1873 AnnotationSetText(l, 0);
1874 // Free remaining data
1875 static_cast<LineAnnotation *>(perLineData[ldAnnotation])->ClearAll();
1878 void Document::IncrementStyleClock() {
1879 styleClock = (styleClock + 1) % 0x100000;
1882 void SCI_METHOD Document::DecorationFillRange(int position, int value, int fillLength) {
1883 if (decorations.FillRange(position, value, fillLength)) {
1884 DocModification mh(SC_MOD_CHANGEINDICATOR | SC_PERFORMED_USER,
1885 position, fillLength);
1886 NotifyModified(mh);
1890 bool Document::AddWatcher(DocWatcher *watcher, void *userData) {
1891 WatcherWithUserData wwud(watcher, userData);
1892 std::vector<WatcherWithUserData>::iterator it =
1893 std::find(watchers.begin(), watchers.end(), wwud);
1894 if (it != watchers.end())
1895 return false;
1896 watchers.push_back(wwud);
1897 return true;
1900 bool Document::RemoveWatcher(DocWatcher *watcher, void *userData) {
1901 std::vector<WatcherWithUserData>::iterator it =
1902 std::find(watchers.begin(), watchers.end(), WatcherWithUserData(watcher, userData));
1903 if (it != watchers.end()) {
1904 watchers.erase(it);
1905 return true;
1907 return false;
1910 void Document::NotifyModifyAttempt() {
1911 for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
1912 it->watcher->NotifyModifyAttempt(this, it->userData);
1916 void Document::NotifySavePoint(bool atSavePoint) {
1917 for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
1918 it->watcher->NotifySavePoint(this, it->userData, atSavePoint);
1922 void Document::NotifyModified(DocModification mh) {
1923 if (mh.modificationType & SC_MOD_INSERTTEXT) {
1924 decorations.InsertSpace(mh.position, mh.length);
1925 } else if (mh.modificationType & SC_MOD_DELETETEXT) {
1926 decorations.DeleteRange(mh.position, mh.length);
1928 for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
1929 it->watcher->NotifyModified(this, mh, it->userData);
1933 bool Document::IsWordPartSeparator(char ch) const {
1934 return (WordCharClass(ch) == CharClassify::ccWord) && IsPunctuation(ch);
1937 int Document::WordPartLeft(int pos) {
1938 if (pos > 0) {
1939 --pos;
1940 char startChar = cb.CharAt(pos);
1941 if (IsWordPartSeparator(startChar)) {
1942 while (pos > 0 && IsWordPartSeparator(cb.CharAt(pos))) {
1943 --pos;
1946 if (pos > 0) {
1947 startChar = cb.CharAt(pos);
1948 --pos;
1949 if (IsLowerCase(startChar)) {
1950 while (pos > 0 && IsLowerCase(cb.CharAt(pos)))
1951 --pos;
1952 if (!IsUpperCase(cb.CharAt(pos)) && !IsLowerCase(cb.CharAt(pos)))
1953 ++pos;
1954 } else if (IsUpperCase(startChar)) {
1955 while (pos > 0 && IsUpperCase(cb.CharAt(pos)))
1956 --pos;
1957 if (!IsUpperCase(cb.CharAt(pos)))
1958 ++pos;
1959 } else if (IsADigit(startChar)) {
1960 while (pos > 0 && IsADigit(cb.CharAt(pos)))
1961 --pos;
1962 if (!IsADigit(cb.CharAt(pos)))
1963 ++pos;
1964 } else if (IsPunctuation(startChar)) {
1965 while (pos > 0 && IsPunctuation(cb.CharAt(pos)))
1966 --pos;
1967 if (!IsPunctuation(cb.CharAt(pos)))
1968 ++pos;
1969 } else if (isspacechar(startChar)) {
1970 while (pos > 0 && isspacechar(cb.CharAt(pos)))
1971 --pos;
1972 if (!isspacechar(cb.CharAt(pos)))
1973 ++pos;
1974 } else if (!IsASCII(startChar)) {
1975 while (pos > 0 && !IsASCII(cb.CharAt(pos)))
1976 --pos;
1977 if (IsASCII(cb.CharAt(pos)))
1978 ++pos;
1979 } else {
1980 ++pos;
1984 return pos;
1987 int Document::WordPartRight(int pos) {
1988 char startChar = cb.CharAt(pos);
1989 int length = Length();
1990 if (IsWordPartSeparator(startChar)) {
1991 while (pos < length && IsWordPartSeparator(cb.CharAt(pos)))
1992 ++pos;
1993 startChar = cb.CharAt(pos);
1995 if (!IsASCII(startChar)) {
1996 while (pos < length && !IsASCII(cb.CharAt(pos)))
1997 ++pos;
1998 } else if (IsLowerCase(startChar)) {
1999 while (pos < length && IsLowerCase(cb.CharAt(pos)))
2000 ++pos;
2001 } else if (IsUpperCase(startChar)) {
2002 if (IsLowerCase(cb.CharAt(pos + 1))) {
2003 ++pos;
2004 while (pos < length && IsLowerCase(cb.CharAt(pos)))
2005 ++pos;
2006 } else {
2007 while (pos < length && IsUpperCase(cb.CharAt(pos)))
2008 ++pos;
2010 if (IsLowerCase(cb.CharAt(pos)) && IsUpperCase(cb.CharAt(pos - 1)))
2011 --pos;
2012 } else if (IsADigit(startChar)) {
2013 while (pos < length && IsADigit(cb.CharAt(pos)))
2014 ++pos;
2015 } else if (IsPunctuation(startChar)) {
2016 while (pos < length && IsPunctuation(cb.CharAt(pos)))
2017 ++pos;
2018 } else if (isspacechar(startChar)) {
2019 while (pos < length && isspacechar(cb.CharAt(pos)))
2020 ++pos;
2021 } else {
2022 ++pos;
2024 return pos;
2027 bool IsLineEndChar(char c) {
2028 return (c == '\n' || c == '\r');
2031 int Document::ExtendStyleRange(int pos, int delta, bool singleLine) {
2032 int sStart = cb.StyleAt(pos);
2033 if (delta < 0) {
2034 while (pos > 0 && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsLineEndChar(cb.CharAt(pos))))
2035 pos--;
2036 pos++;
2037 } else {
2038 while (pos < (Length()) && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsLineEndChar(cb.CharAt(pos))))
2039 pos++;
2041 return pos;
2044 static char BraceOpposite(char ch) {
2045 switch (ch) {
2046 case '(':
2047 return ')';
2048 case ')':
2049 return '(';
2050 case '[':
2051 return ']';
2052 case ']':
2053 return '[';
2054 case '{':
2055 return '}';
2056 case '}':
2057 return '{';
2058 case '<':
2059 return '>';
2060 case '>':
2061 return '<';
2062 default:
2063 return '\0';
2067 // TODO: should be able to extend styled region to find matching brace
2068 int Document::BraceMatch(int position, int /*maxReStyle*/) {
2069 char chBrace = CharAt(position);
2070 char chSeek = BraceOpposite(chBrace);
2071 if (chSeek == '\0')
2072 return - 1;
2073 char styBrace = static_cast<char>(StyleAt(position));
2074 int direction = -1;
2075 if (chBrace == '(' || chBrace == '[' || chBrace == '{' || chBrace == '<')
2076 direction = 1;
2077 int depth = 1;
2078 position = NextPosition(position, direction);
2079 while ((position >= 0) && (position < Length())) {
2080 char chAtPos = CharAt(position);
2081 char styAtPos = static_cast<char>(StyleAt(position));
2082 if ((position > GetEndStyled()) || (styAtPos == styBrace)) {
2083 if (chAtPos == chBrace)
2084 depth++;
2085 if (chAtPos == chSeek)
2086 depth--;
2087 if (depth == 0)
2088 return position;
2090 int positionBeforeMove = position;
2091 position = NextPosition(position, direction);
2092 if (position == positionBeforeMove)
2093 break;
2095 return - 1;
2099 * Implementation of RegexSearchBase for the default built-in regular expression engine
2101 class BuiltinRegex : public RegexSearchBase {
2102 public:
2103 explicit BuiltinRegex(CharClassify *charClassTable) : search(charClassTable) {}
2105 virtual ~BuiltinRegex() {
2108 virtual long FindText(Document *doc, int minPos, int maxPos, const char *s,
2109 bool caseSensitive, bool word, bool wordStart, int flags,
2110 int *length);
2112 virtual const char *SubstituteByPosition(Document *doc, const char *text, int *length);
2114 private:
2115 RESearch search;
2116 std::string substituted;
2119 // Define a way for the Regular Expression code to access the document
2120 class DocumentIndexer : public CharacterIndexer {
2121 Document *pdoc;
2122 int end;
2123 public:
2124 DocumentIndexer(Document *pdoc_, int end_) :
2125 pdoc(pdoc_), end(end_) {
2128 virtual ~DocumentIndexer() {
2131 virtual char CharAt(int index) {
2132 if (index < 0 || index >= end)
2133 return 0;
2134 else
2135 return pdoc->CharAt(index);
2139 long BuiltinRegex::FindText(Document *doc, int minPos, int maxPos, const char *s,
2140 bool caseSensitive, bool, bool, int flags,
2141 int *length) {
2142 bool posix = (flags & SCFIND_POSIX) != 0;
2143 int increment = (minPos <= maxPos) ? 1 : -1;
2145 int startPos = minPos;
2146 int endPos = maxPos;
2148 // Range endpoints should not be inside DBCS characters, but just in case, move them.
2149 startPos = doc->MovePositionOutsideChar(startPos, 1, false);
2150 endPos = doc->MovePositionOutsideChar(endPos, 1, false);
2152 const char *errmsg = search.Compile(s, *length, caseSensitive, posix);
2153 if (errmsg) {
2154 return -1;
2156 // Find a variable in a property file: \$(\([A-Za-z0-9_.]+\))
2157 // Replace first '.' with '-' in each property file variable reference:
2158 // Search: \$(\([A-Za-z0-9_-]+\)\.\([A-Za-z0-9_.]+\))
2159 // Replace: $(\1-\2)
2160 int lineRangeStart = doc->LineFromPosition(startPos);
2161 int lineRangeEnd = doc->LineFromPosition(endPos);
2162 if ((increment == 1) &&
2163 (startPos >= doc->LineEnd(lineRangeStart)) &&
2164 (lineRangeStart < lineRangeEnd)) {
2165 // the start position is at end of line or between line end characters.
2166 lineRangeStart++;
2167 startPos = doc->LineStart(lineRangeStart);
2168 } else if ((increment == -1) &&
2169 (startPos <= doc->LineStart(lineRangeStart)) &&
2170 (lineRangeStart > lineRangeEnd)) {
2171 // the start position is at beginning of line.
2172 lineRangeStart--;
2173 startPos = doc->LineEnd(lineRangeStart);
2175 int pos = -1;
2176 int lenRet = 0;
2177 char searchEnd = s[*length - 1];
2178 char searchEndPrev = (*length > 1) ? s[*length - 2] : '\0';
2179 int lineRangeBreak = lineRangeEnd + increment;
2180 for (int line = lineRangeStart; line != lineRangeBreak; line += increment) {
2181 int startOfLine = doc->LineStart(line);
2182 int endOfLine = doc->LineEnd(line);
2183 if (increment == 1) {
2184 if (line == lineRangeStart) {
2185 if ((startPos != startOfLine) && (s[0] == '^'))
2186 continue; // Can't match start of line if start position after start of line
2187 startOfLine = startPos;
2189 if (line == lineRangeEnd) {
2190 if ((endPos != endOfLine) && (searchEnd == '$') && (searchEndPrev != '\\'))
2191 continue; // Can't match end of line if end position before end of line
2192 endOfLine = endPos;
2194 } else {
2195 if (line == lineRangeEnd) {
2196 if ((endPos != startOfLine) && (s[0] == '^'))
2197 continue; // Can't match start of line if end position after start of line
2198 startOfLine = endPos;
2200 if (line == lineRangeStart) {
2201 if ((startPos != endOfLine) && (searchEnd == '$') && (searchEndPrev != '\\'))
2202 continue; // Can't match end of line if start position before end of line
2203 endOfLine = startPos;
2207 DocumentIndexer di(doc, endOfLine);
2208 int success = search.Execute(di, startOfLine, endOfLine);
2209 if (success) {
2210 pos = search.bopat[0];
2211 // Ensure only whole characters selected
2212 search.eopat[0] = doc->MovePositionOutsideChar(search.eopat[0], 1, false);
2213 lenRet = search.eopat[0] - search.bopat[0];
2214 // There can be only one start of a line, so no need to look for last match in line
2215 if ((increment == -1) && (s[0] != '^')) {
2216 // Check for the last match on this line.
2217 int repetitions = 1000; // Break out of infinite loop
2218 while (success && (search.eopat[0] <= endOfLine) && (repetitions--)) {
2219 success = search.Execute(di, pos+1, endOfLine);
2220 if (success) {
2221 if (search.eopat[0] <= minPos) {
2222 pos = search.bopat[0];
2223 lenRet = search.eopat[0] - search.bopat[0];
2224 } else {
2225 success = 0;
2230 break;
2233 *length = lenRet;
2234 return pos;
2237 const char *BuiltinRegex::SubstituteByPosition(Document *doc, const char *text, int *length) {
2238 substituted.clear();
2239 DocumentIndexer di(doc, doc->Length());
2240 search.GrabMatches(di);
2241 for (int j = 0; j < *length; j++) {
2242 if (text[j] == '\\') {
2243 if (text[j + 1] >= '0' && text[j + 1] <= '9') {
2244 unsigned int patNum = text[j + 1] - '0';
2245 unsigned int len = search.eopat[patNum] - search.bopat[patNum];
2246 if (!search.pat[patNum].empty()) // Will be null if try for a match that did not occur
2247 substituted.append(search.pat[patNum].c_str(), len);
2248 j++;
2249 } else {
2250 j++;
2251 switch (text[j]) {
2252 case 'a':
2253 substituted.push_back('\a');
2254 break;
2255 case 'b':
2256 substituted.push_back('\b');
2257 break;
2258 case 'f':
2259 substituted.push_back('\f');
2260 break;
2261 case 'n':
2262 substituted.push_back('\n');
2263 break;
2264 case 'r':
2265 substituted.push_back('\r');
2266 break;
2267 case 't':
2268 substituted.push_back('\t');
2269 break;
2270 case 'v':
2271 substituted.push_back('\v');
2272 break;
2273 case '\\':
2274 substituted.push_back('\\');
2275 break;
2276 default:
2277 substituted.push_back('\\');
2278 j--;
2281 } else {
2282 substituted.push_back(text[j]);
2285 *length = static_cast<int>(substituted.length());
2286 return substituted.c_str();
2289 #ifndef SCI_OWNREGEX
2291 #ifdef SCI_NAMESPACE
2293 RegexSearchBase *Scintilla::CreateRegexSearch(CharClassify *charClassTable) {
2294 return new BuiltinRegex(charClassTable);
2297 #else
2299 RegexSearchBase *CreateRegexSearch(CharClassify *charClassTable) {
2300 return new BuiltinRegex(charClassTable);
2303 #endif
2305 #endif