Optimize %path% parser code
[TortoiseGit.git] / ext / scintilla / src / Document.cxx
blob9f9385e6e1ee6f95b7468838b962b5f0cf8f60b5
1 // Scintilla source code edit control
2 /** @file Document.cxx
3 ** Text document that handles notifications, DBCS, styling, words and end of line.
4 **/
5 // Copyright 1998-2011 by Neil Hodgson <neilh@scintilla.org>
6 // The License.txt file describes the conditions under which this software may be distributed.
8 #include <stdlib.h>
9 #include <string.h>
10 #include <stdio.h>
11 #include <ctype.h>
12 #include <assert.h>
14 #include <string>
15 #include <vector>
17 #include "Platform.h"
19 #include "ILexer.h"
20 #include "Scintilla.h"
22 #include "SplitVector.h"
23 #include "Partitioning.h"
24 #include "RunStyles.h"
25 #include "CellBuffer.h"
26 #include "PerLine.h"
27 #include "CharClassify.h"
28 #include "CharacterSet.h"
29 #include "Decoration.h"
30 #include "Document.h"
31 #include "RESearch.h"
32 #include "UniConversion.h"
34 #ifdef SCI_NAMESPACE
35 using namespace Scintilla;
36 #endif
38 static inline bool IsPunctuation(char ch) {
39 return isascii(ch) && ispunct(ch);
42 void LexInterface::Colourise(int start, int end) {
43 if (pdoc && instance && !performingStyle) {
44 // Protect against reentrance, which may occur, for example, when
45 // fold points are discovered while performing styling and the folding
46 // code looks for child lines which may trigger styling.
47 performingStyle = true;
49 int lengthDoc = pdoc->Length();
50 if (end == -1)
51 end = lengthDoc;
52 int len = end - start;
54 PLATFORM_ASSERT(len >= 0);
55 PLATFORM_ASSERT(start + len <= lengthDoc);
57 int styleStart = 0;
58 if (start > 0)
59 styleStart = pdoc->StyleAt(start - 1) & pdoc->stylingBitsMask;
61 if (len > 0) {
62 instance->Lex(start, len, styleStart, pdoc);
63 instance->Fold(start, len, styleStart, pdoc);
66 performingStyle = false;
70 Document::Document() {
71 refCount = 0;
72 #ifdef _WIN32
73 eolMode = SC_EOL_CRLF;
74 #else
75 eolMode = SC_EOL_LF;
76 #endif
77 dbcsCodePage = 0;
78 stylingBits = 5;
79 stylingBitsMask = 0x1F;
80 stylingMask = 0;
81 endStyled = 0;
82 styleClock = 0;
83 enteredModification = 0;
84 enteredStyling = 0;
85 enteredReadOnlyCount = 0;
86 tabInChars = 8;
87 indentInChars = 0;
88 actualIndentInChars = 8;
89 useTabs = true;
90 tabIndents = true;
91 backspaceUnindents = false;
92 watchers = 0;
93 lenWatchers = 0;
95 matchesValid = false;
96 regex = 0;
98 UTF8BytesOfLeadInitialise();
100 perLineData[ldMarkers] = new LineMarkers();
101 perLineData[ldLevels] = new LineLevels();
102 perLineData[ldState] = new LineState();
103 perLineData[ldMargin] = new LineAnnotation();
104 perLineData[ldAnnotation] = new LineAnnotation();
106 cb.SetPerLine(this);
108 pli = 0;
111 Document::~Document() {
112 for (int i = 0; i < lenWatchers; i++) {
113 watchers[i].watcher->NotifyDeleted(this, watchers[i].userData);
115 delete []watchers;
116 for (int j=0; j<ldSize; j++) {
117 delete perLineData[j];
118 perLineData[j] = 0;
120 watchers = 0;
121 lenWatchers = 0;
122 delete regex;
123 regex = 0;
124 delete pli;
125 pli = 0;
128 void Document::Init() {
129 for (int j=0; j<ldSize; j++) {
130 if (perLineData[j])
131 perLineData[j]->Init();
135 void Document::InsertLine(int line) {
136 for (int j=0; j<ldSize; j++) {
137 if (perLineData[j])
138 perLineData[j]->InsertLine(line);
142 void Document::RemoveLine(int line) {
143 for (int j=0; j<ldSize; j++) {
144 if (perLineData[j])
145 perLineData[j]->RemoveLine(line);
149 // Increase reference count and return its previous value.
150 int Document::AddRef() {
151 return refCount++;
154 // Decrease reference count and return its previous value.
155 // Delete the document if reference count reaches zero.
156 int SCI_METHOD Document::Release() {
157 int curRefCount = --refCount;
158 if (curRefCount == 0)
159 delete this;
160 return curRefCount;
163 void Document::SetSavePoint() {
164 cb.SetSavePoint();
165 NotifySavePoint(true);
168 int Document::GetMark(int line) {
169 return static_cast<LineMarkers *>(perLineData[ldMarkers])->MarkValue(line);
172 int Document::MarkerNext(int lineStart, int mask) const {
173 return static_cast<LineMarkers *>(perLineData[ldMarkers])->MarkerNext(lineStart, mask);
176 int Document::AddMark(int line, int markerNum) {
177 if (line >= 0 && line <= LinesTotal()) {
178 int prev = static_cast<LineMarkers *>(perLineData[ldMarkers])->
179 AddMark(line, markerNum, LinesTotal());
180 DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, 0, line);
181 NotifyModified(mh);
182 return prev;
183 } else {
184 return 0;
188 void Document::AddMarkSet(int line, int valueSet) {
189 if (line < 0 || line > LinesTotal()) {
190 return;
192 unsigned int m = valueSet;
193 for (int i = 0; m; i++, m >>= 1)
194 if (m & 1)
195 static_cast<LineMarkers *>(perLineData[ldMarkers])->
196 AddMark(line, i, LinesTotal());
197 DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, 0, line);
198 NotifyModified(mh);
201 void Document::DeleteMark(int line, int markerNum) {
202 static_cast<LineMarkers *>(perLineData[ldMarkers])->DeleteMark(line, markerNum, false);
203 DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, 0, line);
204 NotifyModified(mh);
207 void Document::DeleteMarkFromHandle(int markerHandle) {
208 static_cast<LineMarkers *>(perLineData[ldMarkers])->DeleteMarkFromHandle(markerHandle);
209 DocModification mh(SC_MOD_CHANGEMARKER, 0, 0, 0, 0);
210 mh.line = -1;
211 NotifyModified(mh);
214 void Document::DeleteAllMarks(int markerNum) {
215 bool someChanges = false;
216 for (int line = 0; line < LinesTotal(); line++) {
217 if (static_cast<LineMarkers *>(perLineData[ldMarkers])->DeleteMark(line, markerNum, true))
218 someChanges = true;
220 if (someChanges) {
221 DocModification mh(SC_MOD_CHANGEMARKER, 0, 0, 0, 0);
222 mh.line = -1;
223 NotifyModified(mh);
227 int Document::LineFromHandle(int markerHandle) {
228 return static_cast<LineMarkers *>(perLineData[ldMarkers])->LineFromHandle(markerHandle);
231 int SCI_METHOD Document::LineStart(int line) const {
232 return cb.LineStart(line);
235 int Document::LineEnd(int line) const {
236 if (line == LinesTotal() - 1) {
237 return LineStart(line + 1);
238 } else {
239 int position = LineStart(line + 1) - 1;
240 // When line terminator is CR+LF, may need to go back one more
241 if ((position > LineStart(line)) && (cb.CharAt(position - 1) == '\r')) {
242 position--;
244 return position;
248 void SCI_METHOD Document::SetErrorStatus(int status) {
249 // Tell the watchers the lexer has changed.
250 for (int i = 0; i < lenWatchers; i++) {
251 watchers[i].watcher->NotifyErrorOccurred(this, watchers[i].userData, status);
255 int SCI_METHOD Document::LineFromPosition(int pos) const {
256 return cb.LineFromPosition(pos);
259 int Document::LineEndPosition(int position) const {
260 return LineEnd(LineFromPosition(position));
263 bool Document::IsLineEndPosition(int position) const {
264 return LineEnd(LineFromPosition(position)) == position;
267 int Document::VCHomePosition(int position) const {
268 int line = LineFromPosition(position);
269 int startPosition = LineStart(line);
270 int endLine = LineEnd(line);
271 int startText = startPosition;
272 while (startText < endLine && (cb.CharAt(startText) == ' ' || cb.CharAt(startText) == '\t'))
273 startText++;
274 if (position == startText)
275 return startPosition;
276 else
277 return startText;
280 int SCI_METHOD Document::SetLevel(int line, int level) {
281 int prev = static_cast<LineLevels *>(perLineData[ldLevels])->SetLevel(line, level, LinesTotal());
282 if (prev != level) {
283 DocModification mh(SC_MOD_CHANGEFOLD | SC_MOD_CHANGEMARKER,
284 LineStart(line), 0, 0, 0, line);
285 mh.foldLevelNow = level;
286 mh.foldLevelPrev = prev;
287 NotifyModified(mh);
289 return prev;
292 int SCI_METHOD Document::GetLevel(int line) const {
293 return static_cast<LineLevels *>(perLineData[ldLevels])->GetLevel(line);
296 void Document::ClearLevels() {
297 static_cast<LineLevels *>(perLineData[ldLevels])->ClearLevels();
300 static bool IsSubordinate(int levelStart, int levelTry) {
301 if (levelTry & SC_FOLDLEVELWHITEFLAG)
302 return true;
303 else
304 return (levelStart & SC_FOLDLEVELNUMBERMASK) < (levelTry & SC_FOLDLEVELNUMBERMASK);
307 int Document::GetLastChild(int lineParent, int level, int lastLine) {
308 if (level == -1)
309 level = GetLevel(lineParent) & SC_FOLDLEVELNUMBERMASK;
310 int maxLine = LinesTotal();
311 int lookLastLine = (lastLine != -1) ? Platform::Minimum(LinesTotal() - 1, lastLine) : -1;
312 int lineMaxSubord = lineParent;
313 while (lineMaxSubord < maxLine - 1) {
314 EnsureStyledTo(LineStart(lineMaxSubord + 2));
315 if (!IsSubordinate(level, GetLevel(lineMaxSubord + 1)))
316 break;
317 if ((lookLastLine != -1) && (lineMaxSubord >= lookLastLine) && !(GetLevel(lineMaxSubord) & SC_FOLDLEVELWHITEFLAG))
318 break;
319 lineMaxSubord++;
321 if (lineMaxSubord > lineParent) {
322 if (level > (GetLevel(lineMaxSubord + 1) & SC_FOLDLEVELNUMBERMASK)) {
323 // Have chewed up some whitespace that belongs to a parent so seek back
324 if (GetLevel(lineMaxSubord) & SC_FOLDLEVELWHITEFLAG) {
325 lineMaxSubord--;
329 return lineMaxSubord;
332 int Document::GetFoldParent(int line) {
333 int level = GetLevel(line) & SC_FOLDLEVELNUMBERMASK;
334 int lineLook = line - 1;
335 while ((lineLook > 0) && (
336 (!(GetLevel(lineLook) & SC_FOLDLEVELHEADERFLAG)) ||
337 ((GetLevel(lineLook) & SC_FOLDLEVELNUMBERMASK) >= level))
339 lineLook--;
341 if ((GetLevel(lineLook) & SC_FOLDLEVELHEADERFLAG) &&
342 ((GetLevel(lineLook) & SC_FOLDLEVELNUMBERMASK) < level)) {
343 return lineLook;
344 } else {
345 return -1;
349 void Document::GetHighlightDelimiters(HighlightDelimiter &highlightDelimiter, int line, int lastLine) {
350 int level = GetLevel(line);
351 int lookLastLine = Platform::Maximum(line, lastLine) + 1;
353 int lookLine = line;
354 int lookLineLevel = level;
355 int lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
356 while ((lookLine > 0) && ((lookLineLevel & SC_FOLDLEVELWHITEFLAG) ||
357 ((lookLineLevel & SC_FOLDLEVELHEADERFLAG) && (lookLineLevelNum >= (GetLevel(lookLine + 1) & SC_FOLDLEVELNUMBERMASK))))) {
358 lookLineLevel = GetLevel(--lookLine);
359 lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
362 int beginFoldBlock = (lookLineLevel & SC_FOLDLEVELHEADERFLAG) ? lookLine : GetFoldParent(lookLine);
363 if (beginFoldBlock == -1) {
364 highlightDelimiter.Clear();
365 return;
368 int endFoldBlock = GetLastChild(beginFoldBlock, -1, lookLastLine);
369 int firstChangeableLineBefore = -1;
370 if (endFoldBlock < line) {
371 lookLine = beginFoldBlock - 1;
372 lookLineLevel = GetLevel(lookLine);
373 lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
374 while ((lookLine >= 0) && (lookLineLevelNum >= SC_FOLDLEVELBASE)) {
375 if (lookLineLevel & SC_FOLDLEVELHEADERFLAG) {
376 if (GetLastChild(lookLine, -1, lookLastLine) == line) {
377 beginFoldBlock = lookLine;
378 endFoldBlock = line;
379 firstChangeableLineBefore = line - 1;
382 if ((lookLine > 0) && (lookLineLevelNum == SC_FOLDLEVELBASE) && ((GetLevel(lookLine - 1) & SC_FOLDLEVELNUMBERMASK) > lookLineLevelNum))
383 break;
384 lookLineLevel = GetLevel(--lookLine);
385 lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
388 if (firstChangeableLineBefore == -1) {
389 for (lookLine = line - 1, lookLineLevel = GetLevel(lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
390 lookLine >= beginFoldBlock;
391 lookLineLevel = GetLevel(--lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK) {
392 if ((lookLineLevel & SC_FOLDLEVELWHITEFLAG) || (lookLineLevelNum > (level & SC_FOLDLEVELNUMBERMASK))) {
393 firstChangeableLineBefore = lookLine;
394 break;
398 if (firstChangeableLineBefore == -1)
399 firstChangeableLineBefore = beginFoldBlock - 1;
401 int firstChangeableLineAfter = -1;
402 for (lookLine = line + 1, lookLineLevel = GetLevel(lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
403 lookLine <= endFoldBlock;
404 lookLineLevel = GetLevel(++lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK) {
405 if ((lookLineLevel & SC_FOLDLEVELHEADERFLAG) && (lookLineLevelNum < (GetLevel(lookLine + 1) & SC_FOLDLEVELNUMBERMASK))) {
406 firstChangeableLineAfter = lookLine;
407 break;
410 if (firstChangeableLineAfter == -1)
411 firstChangeableLineAfter = endFoldBlock + 1;
413 highlightDelimiter.beginFoldBlock = beginFoldBlock;
414 highlightDelimiter.endFoldBlock = endFoldBlock;
415 highlightDelimiter.firstChangeableLineBefore = firstChangeableLineBefore;
416 highlightDelimiter.firstChangeableLineAfter = firstChangeableLineAfter;
419 int Document::ClampPositionIntoDocument(int pos) {
420 return Platform::Clamp(pos, 0, Length());
423 bool Document::IsCrLf(int pos) {
424 if (pos < 0)
425 return false;
426 if (pos >= (Length() - 1))
427 return false;
428 return (cb.CharAt(pos) == '\r') && (cb.CharAt(pos + 1) == '\n');
431 int Document::LenChar(int pos) {
432 if (pos < 0) {
433 return 1;
434 } else if (IsCrLf(pos)) {
435 return 2;
436 } else if (SC_CP_UTF8 == dbcsCodePage) {
437 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(pos));
438 const int widthCharBytes = UTF8BytesOfLead[leadByte];
439 int lengthDoc = Length();
440 if ((pos + widthCharBytes) > lengthDoc)
441 return lengthDoc - pos;
442 else
443 return widthCharBytes;
444 } else if (dbcsCodePage) {
445 return IsDBCSLeadByte(cb.CharAt(pos)) ? 2 : 1;
446 } else {
447 return 1;
451 bool Document::InGoodUTF8(int pos, int &start, int &end) const {
452 int trail = pos;
453 while ((trail>0) && (pos-trail < UTF8MaxBytes) && UTF8IsTrailByte(static_cast<unsigned char>(cb.CharAt(trail-1))))
454 trail--;
455 start = (trail > 0) ? trail-1 : trail;
457 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(start));
458 const int widthCharBytes = UTF8BytesOfLead[leadByte];
459 if (widthCharBytes == 1) {
460 return false;
461 } else {
462 int trailBytes = widthCharBytes - 1;
463 int len = pos - start;
464 if (len > trailBytes)
465 // pos too far from lead
466 return false;
467 char charBytes[UTF8MaxBytes] = {static_cast<char>(leadByte),0,0,0};
468 for (int b=1; b<widthCharBytes && ((start+b) < Length()); b++)
469 charBytes[b] = cb.CharAt(static_cast<int>(start+b));
470 int utf8status = UTF8Classify(reinterpret_cast<const unsigned char *>(charBytes), widthCharBytes);
471 if (utf8status & UTF8MaskInvalid)
472 return false;
473 end = start + widthCharBytes;
474 return true;
478 // Normalise a position so that it is not halfway through a two byte character.
479 // This can occur in two situations -
480 // When lines are terminated with \r\n pairs which should be treated as one character.
481 // When displaying DBCS text such as Japanese.
482 // If moving, move the position in the indicated direction.
483 int Document::MovePositionOutsideChar(int pos, int moveDir, bool checkLineEnd) {
484 //Platform::DebugPrintf("NoCRLF %d %d\n", pos, moveDir);
485 // If out of range, just return minimum/maximum value.
486 if (pos <= 0)
487 return 0;
488 if (pos >= Length())
489 return Length();
491 // PLATFORM_ASSERT(pos > 0 && pos < Length());
492 if (checkLineEnd && IsCrLf(pos - 1)) {
493 if (moveDir > 0)
494 return pos + 1;
495 else
496 return pos - 1;
499 if (dbcsCodePage) {
500 if (SC_CP_UTF8 == dbcsCodePage) {
501 unsigned char ch = static_cast<unsigned char>(cb.CharAt(pos));
502 // If ch is not a trail byte then pos is valid intercharacter position
503 if (UTF8IsTrailByte(ch)) {
504 int startUTF = pos;
505 int endUTF = pos;
506 if (InGoodUTF8(pos, startUTF, endUTF)) {
507 // ch is a trail byte within a UTF-8 character
508 if (moveDir > 0)
509 pos = endUTF;
510 else
511 pos = startUTF;
513 // Else invalid UTF-8 so return position of isolated trail byte
515 } else {
516 // Anchor DBCS calculations at start of line because start of line can
517 // not be a DBCS trail byte.
518 int posStartLine = LineStart(LineFromPosition(pos));
519 if (pos == posStartLine)
520 return pos;
522 // Step back until a non-lead-byte is found.
523 int posCheck = pos;
524 while ((posCheck > posStartLine) && IsDBCSLeadByte(cb.CharAt(posCheck-1)))
525 posCheck--;
527 // Check from known start of character.
528 while (posCheck < pos) {
529 int mbsize = IsDBCSLeadByte(cb.CharAt(posCheck)) ? 2 : 1;
530 if (posCheck + mbsize == pos) {
531 return pos;
532 } else if (posCheck + mbsize > pos) {
533 if (moveDir > 0) {
534 return posCheck + mbsize;
535 } else {
536 return posCheck;
539 posCheck += mbsize;
544 return pos;
547 // NextPosition moves between valid positions - it can not handle a position in the middle of a
548 // multi-byte character. It is used to iterate through text more efficiently than MovePositionOutsideChar.
549 // A \r\n pair is treated as two characters.
550 int Document::NextPosition(int pos, int moveDir) const {
551 // If out of range, just return minimum/maximum value.
552 int increment = (moveDir > 0) ? 1 : -1;
553 if (pos + increment <= 0)
554 return 0;
555 if (pos + increment >= Length())
556 return Length();
558 if (dbcsCodePage) {
559 if (SC_CP_UTF8 == dbcsCodePage) {
560 if (increment == 1) {
561 // Simple forward movement case so can avoid some checks
562 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(pos));
563 if (UTF8IsAscii(leadByte)) {
564 // Single byte character or invalid
565 pos++;
566 } else {
567 const int widthCharBytes = UTF8BytesOfLead[leadByte];
568 char charBytes[UTF8MaxBytes] = {static_cast<char>(leadByte),0,0,0};
569 for (int b=1; b<widthCharBytes; b++)
570 charBytes[b] = cb.CharAt(static_cast<int>(pos+b));
571 int utf8status = UTF8Classify(reinterpret_cast<const unsigned char *>(charBytes), widthCharBytes);
572 if (utf8status & UTF8MaskInvalid)
573 pos++;
574 else
575 pos += utf8status & UTF8MaskWidth;
577 } else {
578 // Examine byte before position
579 pos--;
580 unsigned char ch = static_cast<unsigned char>(cb.CharAt(pos));
581 // If ch is not a trail byte then pos is valid intercharacter position
582 if (UTF8IsTrailByte(ch)) {
583 // If ch is a trail byte in a valid UTF-8 character then return start of character
584 int startUTF = pos;
585 int endUTF = pos;
586 if (InGoodUTF8(pos, startUTF, endUTF)) {
587 pos = startUTF;
589 // Else invalid UTF-8 so return position of isolated trail byte
592 } else {
593 if (moveDir > 0) {
594 int mbsize = IsDBCSLeadByte(cb.CharAt(pos)) ? 2 : 1;
595 pos += mbsize;
596 if (pos > Length())
597 pos = Length();
598 } else {
599 // Anchor DBCS calculations at start of line because start of line can
600 // not be a DBCS trail byte.
601 int posStartLine = LineStart(LineFromPosition(pos));
602 // See http://msdn.microsoft.com/en-us/library/cc194792%28v=MSDN.10%29.aspx
603 // http://msdn.microsoft.com/en-us/library/cc194790.aspx
604 if ((pos - 1) <= posStartLine) {
605 return pos - 1;
606 } else if (IsDBCSLeadByte(cb.CharAt(pos - 1))) {
607 // Must actually be trail byte
608 return pos - 2;
609 } else {
610 // Otherwise, step back until a non-lead-byte is found.
611 int posTemp = pos - 1;
612 while (posStartLine <= --posTemp && IsDBCSLeadByte(cb.CharAt(posTemp)))
614 // Now posTemp+1 must point to the beginning of a character,
615 // so figure out whether we went back an even or an odd
616 // number of bytes and go back 1 or 2 bytes, respectively.
617 return (pos - 1 - ((pos - posTemp) & 1));
621 } else {
622 pos += increment;
625 return pos;
628 bool Document::NextCharacter(int &pos, int moveDir) {
629 // Returns true if pos changed
630 int posNext = NextPosition(pos, moveDir);
631 if (posNext == pos) {
632 return false;
633 } else {
634 pos = posNext;
635 return true;
639 int SCI_METHOD Document::CodePage() const {
640 return dbcsCodePage;
643 bool SCI_METHOD Document::IsDBCSLeadByte(char ch) const {
644 // Byte ranges found in Wikipedia articles with relevant search strings in each case
645 unsigned char uch = static_cast<unsigned char>(ch);
646 switch (dbcsCodePage) {
647 case 932:
648 // Shift_jis
649 return ((uch >= 0x81) && (uch <= 0x9F)) ||
650 ((uch >= 0xE0) && (uch <= 0xFC));
651 // Lead bytes F0 to FC may be a Microsoft addition.
652 case 936:
653 // GBK
654 return (uch >= 0x81) && (uch <= 0xFE);
655 case 949:
656 // Korean Wansung KS C-5601-1987
657 return (uch >= 0x81) && (uch <= 0xFE);
658 case 950:
659 // Big5
660 return (uch >= 0x81) && (uch <= 0xFE);
661 case 1361:
662 // Korean Johab KS C-5601-1992
663 return
664 ((uch >= 0x84) && (uch <= 0xD3)) ||
665 ((uch >= 0xD8) && (uch <= 0xDE)) ||
666 ((uch >= 0xE0) && (uch <= 0xF9));
668 return false;
671 static inline bool IsSpaceOrTab(int ch) {
672 return ch == ' ' || ch == '\t';
675 // Need to break text into segments near lengthSegment but taking into
676 // account the encoding to not break inside a UTF-8 or DBCS character
677 // and also trying to avoid breaking inside a pair of combining characters.
678 // The segment length must always be long enough (more than 4 bytes)
679 // so that there will be at least one whole character to make a segment.
680 // For UTF-8, text must consist only of valid whole characters.
681 // In preference order from best to worst:
682 // 1) Break after space
683 // 2) Break before punctuation
684 // 3) Break after whole character
686 int Document::SafeSegment(const char *text, int length, int lengthSegment) {
687 if (length <= lengthSegment)
688 return length;
689 int lastSpaceBreak = -1;
690 int lastPunctuationBreak = -1;
691 int lastEncodingAllowedBreak = -1;
692 for (int j=0; j < lengthSegment;) {
693 unsigned char ch = static_cast<unsigned char>(text[j]);
694 if (j > 0) {
695 if (IsSpaceOrTab(text[j - 1]) && !IsSpaceOrTab(text[j])) {
696 lastSpaceBreak = j;
698 if (ch < 'A') {
699 lastPunctuationBreak = j;
702 lastEncodingAllowedBreak = j;
704 if (dbcsCodePage == SC_CP_UTF8) {
705 j += UTF8BytesOfLead[ch];
706 } else if (dbcsCodePage) {
707 j += IsDBCSLeadByte(ch) ? 2 : 1;
708 } else {
709 j++;
712 if (lastSpaceBreak >= 0) {
713 return lastSpaceBreak;
714 } else if (lastPunctuationBreak >= 0) {
715 return lastPunctuationBreak;
717 return lastEncodingAllowedBreak;
720 void Document::ModifiedAt(int pos) {
721 if (endStyled > pos)
722 endStyled = pos;
725 void Document::CheckReadOnly() {
726 if (cb.IsReadOnly() && enteredReadOnlyCount == 0) {
727 enteredReadOnlyCount++;
728 NotifyModifyAttempt();
729 enteredReadOnlyCount--;
733 // Document only modified by gateways DeleteChars, InsertString, Undo, Redo, and SetStyleAt.
734 // SetStyleAt does not change the persistent state of a document
736 bool Document::DeleteChars(int pos, int len) {
737 if (len <= 0)
738 return false;
739 if ((pos + len) > Length())
740 return false;
741 CheckReadOnly();
742 if (enteredModification != 0) {
743 return false;
744 } else {
745 enteredModification++;
746 if (!cb.IsReadOnly()) {
747 NotifyModified(
748 DocModification(
749 SC_MOD_BEFOREDELETE | SC_PERFORMED_USER,
750 pos, len,
751 0, 0));
752 int prevLinesTotal = LinesTotal();
753 bool startSavePoint = cb.IsSavePoint();
754 bool startSequence = false;
755 const char *text = cb.DeleteChars(pos, len, startSequence);
756 if (startSavePoint && cb.IsCollectingUndo())
757 NotifySavePoint(!startSavePoint);
758 if ((pos < Length()) || (pos == 0))
759 ModifiedAt(pos);
760 else
761 ModifiedAt(pos-1);
762 NotifyModified(
763 DocModification(
764 SC_MOD_DELETETEXT | SC_PERFORMED_USER | (startSequence?SC_STARTACTION:0),
765 pos, len,
766 LinesTotal() - prevLinesTotal, text));
768 enteredModification--;
770 return !cb.IsReadOnly();
774 * Insert a string with a length.
776 bool Document::InsertString(int position, const char *s, int insertLength) {
777 if (insertLength <= 0) {
778 return false;
780 CheckReadOnly();
781 if (enteredModification != 0) {
782 return false;
783 } else {
784 enteredModification++;
785 if (!cb.IsReadOnly()) {
786 NotifyModified(
787 DocModification(
788 SC_MOD_BEFOREINSERT | SC_PERFORMED_USER,
789 position, insertLength,
790 0, s));
791 int prevLinesTotal = LinesTotal();
792 bool startSavePoint = cb.IsSavePoint();
793 bool startSequence = false;
794 const char *text = cb.InsertString(position, s, insertLength, startSequence);
795 if (startSavePoint && cb.IsCollectingUndo())
796 NotifySavePoint(!startSavePoint);
797 ModifiedAt(position);
798 NotifyModified(
799 DocModification(
800 SC_MOD_INSERTTEXT | SC_PERFORMED_USER | (startSequence?SC_STARTACTION:0),
801 position, insertLength,
802 LinesTotal() - prevLinesTotal, text));
804 enteredModification--;
806 return !cb.IsReadOnly();
809 int SCI_METHOD Document::AddData(char *data, int length) {
810 try {
811 int position = Length();
812 InsertString(position,data, length);
813 } catch (std::bad_alloc &) {
814 return SC_STATUS_BADALLOC;
815 } catch (...) {
816 return SC_STATUS_FAILURE;
818 return 0;
821 void * SCI_METHOD Document::ConvertToDocument() {
822 return this;
825 int Document::Undo() {
826 int newPos = -1;
827 CheckReadOnly();
828 if (enteredModification == 0) {
829 enteredModification++;
830 if (!cb.IsReadOnly()) {
831 bool startSavePoint = cb.IsSavePoint();
832 bool multiLine = false;
833 int steps = cb.StartUndo();
834 //Platform::DebugPrintf("Steps=%d\n", steps);
835 int coalescedRemovePos = -1;
836 int coalescedRemoveLen = 0;
837 int prevRemoveActionPos = -1;
838 int prevRemoveActionLen = 0;
839 for (int step = 0; step < steps; step++) {
840 const int prevLinesTotal = LinesTotal();
841 const Action &action = cb.GetUndoStep();
842 if (action.at == removeAction) {
843 NotifyModified(DocModification(
844 SC_MOD_BEFOREINSERT | SC_PERFORMED_UNDO, action));
845 } else if (action.at == containerAction) {
846 DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_UNDO);
847 dm.token = action.position;
848 NotifyModified(dm);
849 if (!action.mayCoalesce) {
850 coalescedRemovePos = -1;
851 coalescedRemoveLen = 0;
852 prevRemoveActionPos = -1;
853 prevRemoveActionLen = 0;
855 } else {
856 NotifyModified(DocModification(
857 SC_MOD_BEFOREDELETE | SC_PERFORMED_UNDO, action));
859 cb.PerformUndoStep();
860 if (action.at != containerAction) {
861 ModifiedAt(action.position);
862 newPos = action.position;
865 int modFlags = SC_PERFORMED_UNDO;
866 // With undo, an insertion action becomes a deletion notification
867 if (action.at == removeAction) {
868 newPos += action.lenData;
869 modFlags |= SC_MOD_INSERTTEXT;
870 if ((coalescedRemoveLen > 0) &&
871 (action.position == prevRemoveActionPos || action.position == (prevRemoveActionPos + prevRemoveActionLen))) {
872 coalescedRemoveLen += action.lenData;
873 newPos = coalescedRemovePos + coalescedRemoveLen;
874 } else {
875 coalescedRemovePos = action.position;
876 coalescedRemoveLen = action.lenData;
878 prevRemoveActionPos = action.position;
879 prevRemoveActionLen = action.lenData;
880 } else if (action.at == insertAction) {
881 modFlags |= SC_MOD_DELETETEXT;
882 coalescedRemovePos = -1;
883 coalescedRemoveLen = 0;
884 prevRemoveActionPos = -1;
885 prevRemoveActionLen = 0;
887 if (steps > 1)
888 modFlags |= SC_MULTISTEPUNDOREDO;
889 const int linesAdded = LinesTotal() - prevLinesTotal;
890 if (linesAdded != 0)
891 multiLine = true;
892 if (step == steps - 1) {
893 modFlags |= SC_LASTSTEPINUNDOREDO;
894 if (multiLine)
895 modFlags |= SC_MULTILINEUNDOREDO;
897 NotifyModified(DocModification(modFlags, action.position, action.lenData,
898 linesAdded, action.data));
901 bool endSavePoint = cb.IsSavePoint();
902 if (startSavePoint != endSavePoint)
903 NotifySavePoint(endSavePoint);
905 enteredModification--;
907 return newPos;
910 int Document::Redo() {
911 int newPos = -1;
912 CheckReadOnly();
913 if (enteredModification == 0) {
914 enteredModification++;
915 if (!cb.IsReadOnly()) {
916 bool startSavePoint = cb.IsSavePoint();
917 bool multiLine = false;
918 int steps = cb.StartRedo();
919 for (int step = 0; step < steps; step++) {
920 const int prevLinesTotal = LinesTotal();
921 const Action &action = cb.GetRedoStep();
922 if (action.at == insertAction) {
923 NotifyModified(DocModification(
924 SC_MOD_BEFOREINSERT | SC_PERFORMED_REDO, action));
925 } else if (action.at == containerAction) {
926 DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_REDO);
927 dm.token = action.position;
928 NotifyModified(dm);
929 } else {
930 NotifyModified(DocModification(
931 SC_MOD_BEFOREDELETE | SC_PERFORMED_REDO, action));
933 cb.PerformRedoStep();
934 if (action.at != containerAction) {
935 ModifiedAt(action.position);
936 newPos = action.position;
939 int modFlags = SC_PERFORMED_REDO;
940 if (action.at == insertAction) {
941 newPos += action.lenData;
942 modFlags |= SC_MOD_INSERTTEXT;
943 } else if (action.at == removeAction) {
944 modFlags |= SC_MOD_DELETETEXT;
946 if (steps > 1)
947 modFlags |= SC_MULTISTEPUNDOREDO;
948 const int linesAdded = LinesTotal() - prevLinesTotal;
949 if (linesAdded != 0)
950 multiLine = true;
951 if (step == steps - 1) {
952 modFlags |= SC_LASTSTEPINUNDOREDO;
953 if (multiLine)
954 modFlags |= SC_MULTILINEUNDOREDO;
956 NotifyModified(
957 DocModification(modFlags, action.position, action.lenData,
958 linesAdded, action.data));
961 bool endSavePoint = cb.IsSavePoint();
962 if (startSavePoint != endSavePoint)
963 NotifySavePoint(endSavePoint);
965 enteredModification--;
967 return newPos;
971 * Insert a single character.
973 bool Document::InsertChar(int pos, char ch) {
974 char chs[1];
975 chs[0] = ch;
976 return InsertString(pos, chs, 1);
980 * Insert a null terminated string.
982 bool Document::InsertCString(int position, const char *s) {
983 return InsertString(position, s, static_cast<int>(s ? strlen(s) : 0));
986 void Document::ChangeChar(int pos, char ch) {
987 DeleteChars(pos, 1);
988 InsertChar(pos, ch);
991 void Document::DelChar(int pos) {
992 DeleteChars(pos, LenChar(pos));
995 void Document::DelCharBack(int pos) {
996 if (pos <= 0) {
997 return;
998 } else if (IsCrLf(pos - 2)) {
999 DeleteChars(pos - 2, 2);
1000 } else if (dbcsCodePage) {
1001 int startChar = NextPosition(pos, -1);
1002 DeleteChars(startChar, pos - startChar);
1003 } else {
1004 DeleteChars(pos - 1, 1);
1008 static int NextTab(int pos, int tabSize) {
1009 return ((pos / tabSize) + 1) * tabSize;
1012 static std::string CreateIndentation(int indent, int tabSize, bool insertSpaces) {
1013 std::string indentation;
1014 if (!insertSpaces) {
1015 while (indent >= tabSize) {
1016 indentation += '\t';
1017 indent -= tabSize;
1020 while (indent > 0) {
1021 indentation += ' ';
1022 indent--;
1024 return indentation;
1027 int SCI_METHOD Document::GetLineIndentation(int line) {
1028 int indent = 0;
1029 if ((line >= 0) && (line < LinesTotal())) {
1030 int lineStart = LineStart(line);
1031 int length = Length();
1032 for (int i = lineStart; i < length; i++) {
1033 char ch = cb.CharAt(i);
1034 if (ch == ' ')
1035 indent++;
1036 else if (ch == '\t')
1037 indent = NextTab(indent, tabInChars);
1038 else
1039 return indent;
1042 return indent;
1045 void Document::SetLineIndentation(int line, int indent) {
1046 int indentOfLine = GetLineIndentation(line);
1047 if (indent < 0)
1048 indent = 0;
1049 if (indent != indentOfLine) {
1050 std::string linebuf = CreateIndentation(indent, tabInChars, !useTabs);
1051 int thisLineStart = LineStart(line);
1052 int indentPos = GetLineIndentPosition(line);
1053 UndoGroup ug(this);
1054 DeleteChars(thisLineStart, indentPos - thisLineStart);
1055 InsertCString(thisLineStart, linebuf.c_str());
1059 int Document::GetLineIndentPosition(int line) const {
1060 if (line < 0)
1061 return 0;
1062 int pos = LineStart(line);
1063 int length = Length();
1064 while ((pos < length) && IsSpaceOrTab(cb.CharAt(pos))) {
1065 pos++;
1067 return pos;
1070 int Document::GetColumn(int pos) {
1071 int column = 0;
1072 int line = LineFromPosition(pos);
1073 if ((line >= 0) && (line < LinesTotal())) {
1074 for (int i = LineStart(line); i < pos;) {
1075 char ch = cb.CharAt(i);
1076 if (ch == '\t') {
1077 column = NextTab(column, tabInChars);
1078 i++;
1079 } else if (ch == '\r') {
1080 return column;
1081 } else if (ch == '\n') {
1082 return column;
1083 } else if (i >= Length()) {
1084 return column;
1085 } else {
1086 column++;
1087 i = NextPosition(i, 1);
1091 return column;
1094 int Document::CountCharacters(int startPos, int endPos) {
1095 startPos = MovePositionOutsideChar(startPos, 1, false);
1096 endPos = MovePositionOutsideChar(endPos, -1, false);
1097 int count = 0;
1098 int i = startPos;
1099 while (i < endPos) {
1100 count++;
1101 if (IsCrLf(i))
1102 i++;
1103 i = NextPosition(i, 1);
1105 return count;
1108 int Document::FindColumn(int line, int column) {
1109 int position = LineStart(line);
1110 if ((line >= 0) && (line < LinesTotal())) {
1111 int columnCurrent = 0;
1112 while ((columnCurrent < column) && (position < Length())) {
1113 char ch = cb.CharAt(position);
1114 if (ch == '\t') {
1115 columnCurrent = NextTab(columnCurrent, tabInChars);
1116 if (columnCurrent > column)
1117 return position;
1118 position++;
1119 } else if (ch == '\r') {
1120 return position;
1121 } else if (ch == '\n') {
1122 return position;
1123 } else {
1124 columnCurrent++;
1125 position = NextPosition(position, 1);
1129 return position;
1132 void Document::Indent(bool forwards, int lineBottom, int lineTop) {
1133 // Dedent - suck white space off the front of the line to dedent by equivalent of a tab
1134 for (int line = lineBottom; line >= lineTop; line--) {
1135 int indentOfLine = GetLineIndentation(line);
1136 if (forwards) {
1137 if (LineStart(line) < LineEnd(line)) {
1138 SetLineIndentation(line, indentOfLine + IndentSize());
1140 } else {
1141 SetLineIndentation(line, indentOfLine - IndentSize());
1146 // Convert line endings for a piece of text to a particular mode.
1147 // Stop at len or when a NUL is found.
1148 // Caller must delete the returned pointer.
1149 char *Document::TransformLineEnds(int *pLenOut, const char *s, size_t len, int eolModeWanted) {
1150 char *dest = new char[2 * len + 1];
1151 const char *sptr = s;
1152 char *dptr = dest;
1153 for (size_t i = 0; (i < len) && (*sptr != '\0'); i++) {
1154 if (*sptr == '\n' || *sptr == '\r') {
1155 if (eolModeWanted == SC_EOL_CR) {
1156 *dptr++ = '\r';
1157 } else if (eolModeWanted == SC_EOL_LF) {
1158 *dptr++ = '\n';
1159 } else { // eolModeWanted == SC_EOL_CRLF
1160 *dptr++ = '\r';
1161 *dptr++ = '\n';
1163 if ((*sptr == '\r') && (i+1 < len) && (*(sptr+1) == '\n')) {
1164 i++;
1165 sptr++;
1167 sptr++;
1168 } else {
1169 *dptr++ = *sptr++;
1172 *dptr++ = '\0';
1173 *pLenOut = (dptr - dest) - 1;
1174 return dest;
1177 void Document::ConvertLineEnds(int eolModeSet) {
1178 UndoGroup ug(this);
1180 for (int pos = 0; pos < Length(); pos++) {
1181 if (cb.CharAt(pos) == '\r') {
1182 if (cb.CharAt(pos + 1) == '\n') {
1183 // CRLF
1184 if (eolModeSet == SC_EOL_CR) {
1185 DeleteChars(pos + 1, 1); // Delete the LF
1186 } else if (eolModeSet == SC_EOL_LF) {
1187 DeleteChars(pos, 1); // Delete the CR
1188 } else {
1189 pos++;
1191 } else {
1192 // CR
1193 if (eolModeSet == SC_EOL_CRLF) {
1194 InsertString(pos + 1, "\n", 1); // Insert LF
1195 pos++;
1196 } else if (eolModeSet == SC_EOL_LF) {
1197 InsertString(pos, "\n", 1); // Insert LF
1198 DeleteChars(pos + 1, 1); // Delete CR
1201 } else if (cb.CharAt(pos) == '\n') {
1202 // LF
1203 if (eolModeSet == SC_EOL_CRLF) {
1204 InsertString(pos, "\r", 1); // Insert CR
1205 pos++;
1206 } else if (eolModeSet == SC_EOL_CR) {
1207 InsertString(pos, "\r", 1); // Insert CR
1208 DeleteChars(pos + 1, 1); // Delete LF
1215 bool Document::IsWhiteLine(int line) const {
1216 int currentChar = LineStart(line);
1217 int endLine = LineEnd(line);
1218 while (currentChar < endLine) {
1219 if (cb.CharAt(currentChar) != ' ' && cb.CharAt(currentChar) != '\t') {
1220 return false;
1222 ++currentChar;
1224 return true;
1227 int Document::ParaUp(int pos) {
1228 int line = LineFromPosition(pos);
1229 line--;
1230 while (line >= 0 && IsWhiteLine(line)) { // skip empty lines
1231 line--;
1233 while (line >= 0 && !IsWhiteLine(line)) { // skip non-empty lines
1234 line--;
1236 line++;
1237 return LineStart(line);
1240 int Document::ParaDown(int pos) {
1241 int line = LineFromPosition(pos);
1242 while (line < LinesTotal() && !IsWhiteLine(line)) { // skip non-empty lines
1243 line++;
1245 while (line < LinesTotal() && IsWhiteLine(line)) { // skip empty lines
1246 line++;
1248 if (line < LinesTotal())
1249 return LineStart(line);
1250 else // end of a document
1251 return LineEnd(line-1);
1254 CharClassify::cc Document::WordCharClass(unsigned char ch) {
1255 if ((SC_CP_UTF8 == dbcsCodePage) && (!UTF8IsAscii(ch)))
1256 return CharClassify::ccWord;
1257 return charClass.GetClass(ch);
1261 * Used by commmands that want to select whole words.
1262 * Finds the start of word at pos when delta < 0 or the end of the word when delta >= 0.
1264 int Document::ExtendWordSelect(int pos, int delta, bool onlyWordCharacters) {
1265 CharClassify::cc ccStart = CharClassify::ccWord;
1266 if (delta < 0) {
1267 if (!onlyWordCharacters)
1268 ccStart = WordCharClass(cb.CharAt(pos-1));
1269 while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == ccStart))
1270 pos--;
1271 } else {
1272 if (!onlyWordCharacters && pos < Length())
1273 ccStart = WordCharClass(cb.CharAt(pos));
1274 while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == ccStart))
1275 pos++;
1277 return MovePositionOutsideChar(pos, delta, true);
1281 * Find the start of the next word in either a forward (delta >= 0) or backwards direction
1282 * (delta < 0).
1283 * This is looking for a transition between character classes although there is also some
1284 * additional movement to transit white space.
1285 * Used by cursor movement by word commands.
1287 int Document::NextWordStart(int pos, int delta) {
1288 if (delta < 0) {
1289 while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == CharClassify::ccSpace))
1290 pos--;
1291 if (pos > 0) {
1292 CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos-1));
1293 while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == ccStart)) {
1294 pos--;
1297 } else {
1298 CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos));
1299 while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == ccStart))
1300 pos++;
1301 while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == CharClassify::ccSpace))
1302 pos++;
1304 return pos;
1308 * Find the end of the next word in either a forward (delta >= 0) or backwards direction
1309 * (delta < 0).
1310 * This is looking for a transition between character classes although there is also some
1311 * additional movement to transit white space.
1312 * Used by cursor movement by word commands.
1314 int Document::NextWordEnd(int pos, int delta) {
1315 if (delta < 0) {
1316 if (pos > 0) {
1317 CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos-1));
1318 if (ccStart != CharClassify::ccSpace) {
1319 while (pos > 0 && WordCharClass(cb.CharAt(pos - 1)) == ccStart) {
1320 pos--;
1323 while (pos > 0 && WordCharClass(cb.CharAt(pos - 1)) == CharClassify::ccSpace) {
1324 pos--;
1327 } else {
1328 while (pos < Length() && WordCharClass(cb.CharAt(pos)) == CharClassify::ccSpace) {
1329 pos++;
1331 if (pos < Length()) {
1332 CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos));
1333 while (pos < Length() && WordCharClass(cb.CharAt(pos)) == ccStart) {
1334 pos++;
1338 return pos;
1342 * Check that the character at the given position is a word or punctuation character and that
1343 * the previous character is of a different character class.
1345 bool Document::IsWordStartAt(int pos) {
1346 if (pos > 0) {
1347 CharClassify::cc ccPos = WordCharClass(CharAt(pos));
1348 return (ccPos == CharClassify::ccWord || ccPos == CharClassify::ccPunctuation) &&
1349 (ccPos != WordCharClass(CharAt(pos - 1)));
1351 return true;
1355 * Check that the character at the given position is a word or punctuation character and that
1356 * the next character is of a different character class.
1358 bool Document::IsWordEndAt(int pos) {
1359 if (pos < Length()) {
1360 CharClassify::cc ccPrev = WordCharClass(CharAt(pos-1));
1361 return (ccPrev == CharClassify::ccWord || ccPrev == CharClassify::ccPunctuation) &&
1362 (ccPrev != WordCharClass(CharAt(pos)));
1364 return true;
1368 * Check that the given range is has transitions between character classes at both
1369 * ends and where the characters on the inside are word or punctuation characters.
1371 bool Document::IsWordAt(int start, int end) {
1372 return IsWordStartAt(start) && IsWordEndAt(end);
1375 static inline char MakeLowerCase(char ch) {
1376 if (ch < 'A' || ch > 'Z')
1377 return ch;
1378 else
1379 return static_cast<char>(ch - 'A' + 'a');
1382 CaseFolderTable::CaseFolderTable() {
1383 for (size_t iChar=0; iChar<sizeof(mapping); iChar++) {
1384 mapping[iChar] = static_cast<char>(iChar);
1388 CaseFolderTable::~CaseFolderTable() {
1391 size_t CaseFolderTable::Fold(char *folded, size_t sizeFolded, const char *mixed, size_t lenMixed) {
1392 if (lenMixed > sizeFolded) {
1393 return 0;
1394 } else {
1395 for (size_t i=0; i<lenMixed; i++) {
1396 folded[i] = mapping[static_cast<unsigned char>(mixed[i])];
1398 return lenMixed;
1402 void CaseFolderTable::SetTranslation(char ch, char chTranslation) {
1403 mapping[static_cast<unsigned char>(ch)] = chTranslation;
1406 void CaseFolderTable::StandardASCII() {
1407 for (size_t iChar=0; iChar<sizeof(mapping); iChar++) {
1408 if (iChar >= 'A' && iChar <= 'Z') {
1409 mapping[iChar] = static_cast<char>(iChar - 'A' + 'a');
1410 } else {
1411 mapping[iChar] = static_cast<char>(iChar);
1416 bool Document::MatchesWordOptions(bool word, bool wordStart, int pos, int length) {
1417 return (!word && !wordStart) ||
1418 (word && IsWordAt(pos, pos + length)) ||
1419 (wordStart && IsWordStartAt(pos));
1423 * Find text in document, supporting both forward and backward
1424 * searches (just pass minPos > maxPos to do a backward search)
1425 * Has not been tested with backwards DBCS searches yet.
1427 long Document::FindText(int minPos, int maxPos, const char *search,
1428 bool caseSensitive, bool word, bool wordStart, bool regExp, int flags,
1429 int *length, CaseFolder *pcf) {
1430 if (*length <= 0)
1431 return minPos;
1432 if (regExp) {
1433 if (!regex)
1434 regex = CreateRegexSearch(&charClass);
1435 return regex->FindText(this, minPos, maxPos, search, caseSensitive, word, wordStart, flags, length);
1436 } else {
1438 const bool forward = minPos <= maxPos;
1439 const int increment = forward ? 1 : -1;
1441 // Range endpoints should not be inside DBCS characters, but just in case, move them.
1442 const int startPos = MovePositionOutsideChar(minPos, increment, false);
1443 const int endPos = MovePositionOutsideChar(maxPos, increment, false);
1445 // Compute actual search ranges needed
1446 const int lengthFind = *length;
1448 //Platform::DebugPrintf("Find %d %d %s %d\n", startPos, endPos, ft->lpstrText, lengthFind);
1449 const int limitPos = Platform::Maximum(startPos, endPos);
1450 int pos = startPos;
1451 if (!forward) {
1452 // Back all of a character
1453 pos = NextPosition(pos, increment);
1455 if (caseSensitive) {
1456 const int endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos;
1457 const char charStartSearch = search[0];
1458 while (forward ? (pos < endSearch) : (pos >= endSearch)) {
1459 if (CharAt(pos) == charStartSearch) {
1460 bool found = (pos + lengthFind) <= limitPos;
1461 for (int indexSearch = 1; (indexSearch < lengthFind) && found; indexSearch++) {
1462 found = CharAt(pos + indexSearch) == search[indexSearch];
1464 if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
1465 return pos;
1468 if (!NextCharacter(pos, increment))
1469 break;
1471 } else if (SC_CP_UTF8 == dbcsCodePage) {
1472 const size_t maxFoldingExpansion = 4;
1473 std::vector<char> searchThing(lengthFind * UTF8MaxBytes * maxFoldingExpansion + 1);
1474 const int lenSearch = static_cast<int>(
1475 pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind));
1476 char bytes[UTF8MaxBytes + 1];
1477 char folded[UTF8MaxBytes * maxFoldingExpansion + 1];
1478 while (forward ? (pos < endPos) : (pos >= endPos)) {
1479 int widthFirstCharacter = 0;
1480 int posIndexDocument = pos;
1481 int indexSearch = 0;
1482 bool characterMatches = true;
1483 for (;;) {
1484 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(posIndexDocument));
1485 bytes[0] = leadByte;
1486 int widthChar = 1;
1487 if (!UTF8IsAscii(leadByte)) {
1488 const int widthCharBytes = UTF8BytesOfLead[leadByte];
1489 for (int b=1; b<widthCharBytes; b++) {
1490 bytes[b] = cb.CharAt(posIndexDocument+b);
1492 widthChar = UTF8Classify(reinterpret_cast<const unsigned char *>(bytes), widthCharBytes) & UTF8MaskWidth;
1494 if (!widthFirstCharacter)
1495 widthFirstCharacter = widthChar;
1496 if ((posIndexDocument + widthChar) > limitPos)
1497 break;
1498 const int lenFlat = static_cast<int>(pcf->Fold(folded, sizeof(folded), bytes, widthChar));
1499 folded[lenFlat] = 0;
1500 // Does folded match the buffer
1501 characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat);
1502 if (!characterMatches)
1503 break;
1504 posIndexDocument += widthChar;
1505 indexSearch += lenFlat;
1506 if (indexSearch >= lenSearch)
1507 break;
1509 if (characterMatches && (indexSearch == static_cast<int>(lenSearch))) {
1510 if (MatchesWordOptions(word, wordStart, pos, posIndexDocument - pos)) {
1511 *length = posIndexDocument - pos;
1512 return pos;
1515 if (forward) {
1516 pos += widthFirstCharacter;
1517 } else {
1518 if (!NextCharacter(pos, increment))
1519 break;
1522 } else if (dbcsCodePage) {
1523 const size_t maxBytesCharacter = 2;
1524 const size_t maxFoldingExpansion = 4;
1525 std::vector<char> searchThing(lengthFind * maxBytesCharacter * maxFoldingExpansion + 1);
1526 const int lenSearch = static_cast<int>(
1527 pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind));
1528 while (forward ? (pos < endPos) : (pos >= endPos)) {
1529 int indexDocument = 0;
1530 int indexSearch = 0;
1531 bool characterMatches = true;
1532 while (characterMatches &&
1533 ((pos + indexDocument) < limitPos) &&
1534 (indexSearch < lenSearch)) {
1535 char bytes[maxBytesCharacter + 1];
1536 bytes[0] = cb.CharAt(pos + indexDocument);
1537 const int widthChar = IsDBCSLeadByte(bytes[0]) ? 2 : 1;
1538 if (widthChar == 2)
1539 bytes[1] = cb.CharAt(pos + indexDocument + 1);
1540 if ((pos + indexDocument + widthChar) > limitPos)
1541 break;
1542 char folded[maxBytesCharacter * maxFoldingExpansion + 1];
1543 const int lenFlat = static_cast<int>(pcf->Fold(folded, sizeof(folded), bytes, widthChar));
1544 folded[lenFlat] = 0;
1545 // Does folded match the buffer
1546 characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat);
1547 indexDocument += widthChar;
1548 indexSearch += lenFlat;
1550 if (characterMatches && (indexSearch == static_cast<int>(lenSearch))) {
1551 if (MatchesWordOptions(word, wordStart, pos, indexDocument)) {
1552 *length = indexDocument;
1553 return pos;
1556 if (!NextCharacter(pos, increment))
1557 break;
1559 } else {
1560 const int endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos;
1561 std::vector<char> searchThing(lengthFind + 1);
1562 pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind);
1563 while (forward ? (pos < endSearch) : (pos >= endSearch)) {
1564 bool found = (pos + lengthFind) <= limitPos;
1565 for (int indexSearch = 0; (indexSearch < lengthFind) && found; indexSearch++) {
1566 char ch = CharAt(pos + indexSearch);
1567 char folded[2];
1568 pcf->Fold(folded, sizeof(folded), &ch, 1);
1569 found = folded[0] == searchThing[indexSearch];
1571 if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
1572 return pos;
1574 if (!NextCharacter(pos, increment))
1575 break;
1579 //Platform::DebugPrintf("Not found\n");
1580 return -1;
1583 const char *Document::SubstituteByPosition(const char *text, int *length) {
1584 if (regex)
1585 return regex->SubstituteByPosition(this, text, length);
1586 else
1587 return 0;
1590 int Document::LinesTotal() const {
1591 return cb.Lines();
1594 void Document::ChangeCase(Range r, bool makeUpperCase) {
1595 for (int pos = r.start; pos < r.end;) {
1596 int len = LenChar(pos);
1597 if (len == 1) {
1598 char ch = CharAt(pos);
1599 if (makeUpperCase) {
1600 if (IsLowerCase(ch)) {
1601 ChangeChar(pos, static_cast<char>(MakeUpperCase(ch)));
1603 } else {
1604 if (IsUpperCase(ch)) {
1605 ChangeChar(pos, static_cast<char>(MakeLowerCase(ch)));
1609 pos += len;
1613 void Document::SetDefaultCharClasses(bool includeWordClass) {
1614 charClass.SetDefaultCharClasses(includeWordClass);
1617 void Document::SetCharClasses(const unsigned char *chars, CharClassify::cc newCharClass) {
1618 charClass.SetCharClasses(chars, newCharClass);
1621 int Document::GetCharsOfClass(CharClassify::cc characterClass, unsigned char *buffer) {
1622 return charClass.GetCharsOfClass(characterClass, buffer);
1625 void Document::SetStylingBits(int bits) {
1626 stylingBits = bits;
1627 stylingBitsMask = (1 << stylingBits) - 1;
1630 void SCI_METHOD Document::StartStyling(int position, char mask) {
1631 stylingMask = mask;
1632 endStyled = position;
1635 bool SCI_METHOD Document::SetStyleFor(int length, char style) {
1636 if (enteredStyling != 0) {
1637 return false;
1638 } else {
1639 enteredStyling++;
1640 style &= stylingMask;
1641 int prevEndStyled = endStyled;
1642 if (cb.SetStyleFor(endStyled, length, style, stylingMask)) {
1643 DocModification mh(SC_MOD_CHANGESTYLE | SC_PERFORMED_USER,
1644 prevEndStyled, length);
1645 NotifyModified(mh);
1647 endStyled += length;
1648 enteredStyling--;
1649 return true;
1653 bool SCI_METHOD Document::SetStyles(int length, const char *styles) {
1654 if (enteredStyling != 0) {
1655 return false;
1656 } else {
1657 enteredStyling++;
1658 bool didChange = false;
1659 int startMod = 0;
1660 int endMod = 0;
1661 for (int iPos = 0; iPos < length; iPos++, endStyled++) {
1662 PLATFORM_ASSERT(endStyled < Length());
1663 if (cb.SetStyleAt(endStyled, styles[iPos], stylingMask)) {
1664 if (!didChange) {
1665 startMod = endStyled;
1667 didChange = true;
1668 endMod = endStyled;
1671 if (didChange) {
1672 DocModification mh(SC_MOD_CHANGESTYLE | SC_PERFORMED_USER,
1673 startMod, endMod - startMod + 1);
1674 NotifyModified(mh);
1676 enteredStyling--;
1677 return true;
1681 void Document::EnsureStyledTo(int pos) {
1682 if ((enteredStyling == 0) && (pos > GetEndStyled())) {
1683 IncrementStyleClock();
1684 if (pli && !pli->UseContainerLexing()) {
1685 int lineEndStyled = LineFromPosition(GetEndStyled());
1686 int endStyledTo = LineStart(lineEndStyled);
1687 pli->Colourise(endStyledTo, pos);
1688 } else {
1689 // Ask the watchers to style, and stop as soon as one responds.
1690 for (int i = 0; pos > GetEndStyled() && i < lenWatchers; i++) {
1691 watchers[i].watcher->NotifyStyleNeeded(this, watchers[i].userData, pos);
1697 void Document::LexerChanged() {
1698 // Tell the watchers the lexer has changed.
1699 for (int i = 0; i < lenWatchers; i++) {
1700 watchers[i].watcher->NotifyLexerChanged(this, watchers[i].userData);
1704 int SCI_METHOD Document::SetLineState(int line, int state) {
1705 int statePrevious = static_cast<LineState *>(perLineData[ldState])->SetLineState(line, state);
1706 if (state != statePrevious) {
1707 DocModification mh(SC_MOD_CHANGELINESTATE, LineStart(line), 0, 0, 0, line);
1708 NotifyModified(mh);
1710 return statePrevious;
1713 int SCI_METHOD Document::GetLineState(int line) const {
1714 return static_cast<LineState *>(perLineData[ldState])->GetLineState(line);
1717 int Document::GetMaxLineState() {
1718 return static_cast<LineState *>(perLineData[ldState])->GetMaxLineState();
1721 void SCI_METHOD Document::ChangeLexerState(int start, int end) {
1722 DocModification mh(SC_MOD_LEXERSTATE, start, end-start, 0, 0, 0);
1723 NotifyModified(mh);
1726 StyledText Document::MarginStyledText(int line) {
1727 LineAnnotation *pla = static_cast<LineAnnotation *>(perLineData[ldMargin]);
1728 return StyledText(pla->Length(line), pla->Text(line),
1729 pla->MultipleStyles(line), pla->Style(line), pla->Styles(line));
1732 void Document::MarginSetText(int line, const char *text) {
1733 static_cast<LineAnnotation *>(perLineData[ldMargin])->SetText(line, text);
1734 DocModification mh(SC_MOD_CHANGEMARGIN, LineStart(line), 0, 0, 0, line);
1735 NotifyModified(mh);
1738 void Document::MarginSetStyle(int line, int style) {
1739 static_cast<LineAnnotation *>(perLineData[ldMargin])->SetStyle(line, style);
1740 NotifyModified(DocModification(SC_MOD_CHANGEMARGIN, LineStart(line), 0, 0, 0, line));
1743 void Document::MarginSetStyles(int line, const unsigned char *styles) {
1744 static_cast<LineAnnotation *>(perLineData[ldMargin])->SetStyles(line, styles);
1745 NotifyModified(DocModification(SC_MOD_CHANGEMARGIN, LineStart(line), 0, 0, 0, line));
1748 int Document::MarginLength(int line) const {
1749 return static_cast<LineAnnotation *>(perLineData[ldMargin])->Length(line);
1752 void Document::MarginClearAll() {
1753 int maxEditorLine = LinesTotal();
1754 for (int l=0; l<maxEditorLine; l++)
1755 MarginSetText(l, 0);
1756 // Free remaining data
1757 static_cast<LineAnnotation *>(perLineData[ldMargin])->ClearAll();
1760 bool Document::AnnotationAny() const {
1761 return static_cast<LineAnnotation *>(perLineData[ldAnnotation])->AnySet();
1764 StyledText Document::AnnotationStyledText(int line) {
1765 LineAnnotation *pla = static_cast<LineAnnotation *>(perLineData[ldAnnotation]);
1766 return StyledText(pla->Length(line), pla->Text(line),
1767 pla->MultipleStyles(line), pla->Style(line), pla->Styles(line));
1770 void Document::AnnotationSetText(int line, const char *text) {
1771 if (line >= 0 && line < LinesTotal()) {
1772 const int linesBefore = AnnotationLines(line);
1773 static_cast<LineAnnotation *>(perLineData[ldAnnotation])->SetText(line, text);
1774 const int linesAfter = AnnotationLines(line);
1775 DocModification mh(SC_MOD_CHANGEANNOTATION, LineStart(line), 0, 0, 0, line);
1776 mh.annotationLinesAdded = linesAfter - linesBefore;
1777 NotifyModified(mh);
1781 void Document::AnnotationSetStyle(int line, int style) {
1782 static_cast<LineAnnotation *>(perLineData[ldAnnotation])->SetStyle(line, style);
1783 DocModification mh(SC_MOD_CHANGEANNOTATION, LineStart(line), 0, 0, 0, line);
1784 NotifyModified(mh);
1787 void Document::AnnotationSetStyles(int line, const unsigned char *styles) {
1788 if (line >= 0 && line < LinesTotal()) {
1789 static_cast<LineAnnotation *>(perLineData[ldAnnotation])->SetStyles(line, styles);
1793 int Document::AnnotationLength(int line) const {
1794 return static_cast<LineAnnotation *>(perLineData[ldAnnotation])->Length(line);
1797 int Document::AnnotationLines(int line) const {
1798 return static_cast<LineAnnotation *>(perLineData[ldAnnotation])->Lines(line);
1801 void Document::AnnotationClearAll() {
1802 int maxEditorLine = LinesTotal();
1803 for (int l=0; l<maxEditorLine; l++)
1804 AnnotationSetText(l, 0);
1805 // Free remaining data
1806 static_cast<LineAnnotation *>(perLineData[ldAnnotation])->ClearAll();
1809 void Document::IncrementStyleClock() {
1810 styleClock = (styleClock + 1) % 0x100000;
1813 void SCI_METHOD Document::DecorationFillRange(int position, int value, int fillLength) {
1814 if (decorations.FillRange(position, value, fillLength)) {
1815 DocModification mh(SC_MOD_CHANGEINDICATOR | SC_PERFORMED_USER,
1816 position, fillLength);
1817 NotifyModified(mh);
1821 bool Document::AddWatcher(DocWatcher *watcher, void *userData) {
1822 for (int i = 0; i < lenWatchers; i++) {
1823 if ((watchers[i].watcher == watcher) &&
1824 (watchers[i].userData == userData))
1825 return false;
1827 WatcherWithUserData *pwNew = new WatcherWithUserData[lenWatchers + 1];
1828 for (int j = 0; j < lenWatchers; j++)
1829 pwNew[j] = watchers[j];
1830 pwNew[lenWatchers].watcher = watcher;
1831 pwNew[lenWatchers].userData = userData;
1832 delete []watchers;
1833 watchers = pwNew;
1834 lenWatchers++;
1835 return true;
1838 bool Document::RemoveWatcher(DocWatcher *watcher, void *userData) {
1839 for (int i = 0; i < lenWatchers; i++) {
1840 if ((watchers[i].watcher == watcher) &&
1841 (watchers[i].userData == userData)) {
1842 if (lenWatchers == 1) {
1843 delete []watchers;
1844 watchers = 0;
1845 lenWatchers = 0;
1846 } else {
1847 WatcherWithUserData *pwNew = new WatcherWithUserData[lenWatchers];
1848 for (int j = 0; j < lenWatchers - 1; j++) {
1849 pwNew[j] = (j < i) ? watchers[j] : watchers[j + 1];
1851 delete []watchers;
1852 watchers = pwNew;
1853 lenWatchers--;
1855 return true;
1858 return false;
1861 void Document::NotifyModifyAttempt() {
1862 for (int i = 0; i < lenWatchers; i++) {
1863 watchers[i].watcher->NotifyModifyAttempt(this, watchers[i].userData);
1867 void Document::NotifySavePoint(bool atSavePoint) {
1868 for (int i = 0; i < lenWatchers; i++) {
1869 watchers[i].watcher->NotifySavePoint(this, watchers[i].userData, atSavePoint);
1873 void Document::NotifyModified(DocModification mh) {
1874 if (mh.modificationType & SC_MOD_INSERTTEXT) {
1875 decorations.InsertSpace(mh.position, mh.length);
1876 } else if (mh.modificationType & SC_MOD_DELETETEXT) {
1877 decorations.DeleteRange(mh.position, mh.length);
1879 for (int i = 0; i < lenWatchers; i++) {
1880 watchers[i].watcher->NotifyModified(this, mh, watchers[i].userData);
1884 bool Document::IsWordPartSeparator(char ch) {
1885 return (WordCharClass(ch) == CharClassify::ccWord) && IsPunctuation(ch);
1888 int Document::WordPartLeft(int pos) {
1889 if (pos > 0) {
1890 --pos;
1891 char startChar = cb.CharAt(pos);
1892 if (IsWordPartSeparator(startChar)) {
1893 while (pos > 0 && IsWordPartSeparator(cb.CharAt(pos))) {
1894 --pos;
1897 if (pos > 0) {
1898 startChar = cb.CharAt(pos);
1899 --pos;
1900 if (IsLowerCase(startChar)) {
1901 while (pos > 0 && IsLowerCase(cb.CharAt(pos)))
1902 --pos;
1903 if (!IsUpperCase(cb.CharAt(pos)) && !IsLowerCase(cb.CharAt(pos)))
1904 ++pos;
1905 } else if (IsUpperCase(startChar)) {
1906 while (pos > 0 && IsUpperCase(cb.CharAt(pos)))
1907 --pos;
1908 if (!IsUpperCase(cb.CharAt(pos)))
1909 ++pos;
1910 } else if (IsADigit(startChar)) {
1911 while (pos > 0 && IsADigit(cb.CharAt(pos)))
1912 --pos;
1913 if (!IsADigit(cb.CharAt(pos)))
1914 ++pos;
1915 } else if (IsPunctuation(startChar)) {
1916 while (pos > 0 && IsPunctuation(cb.CharAt(pos)))
1917 --pos;
1918 if (!IsPunctuation(cb.CharAt(pos)))
1919 ++pos;
1920 } else if (isspacechar(startChar)) {
1921 while (pos > 0 && isspacechar(cb.CharAt(pos)))
1922 --pos;
1923 if (!isspacechar(cb.CharAt(pos)))
1924 ++pos;
1925 } else if (!isascii(startChar)) {
1926 while (pos > 0 && !isascii(cb.CharAt(pos)))
1927 --pos;
1928 if (isascii(cb.CharAt(pos)))
1929 ++pos;
1930 } else {
1931 ++pos;
1935 return pos;
1938 int Document::WordPartRight(int pos) {
1939 char startChar = cb.CharAt(pos);
1940 int length = Length();
1941 if (IsWordPartSeparator(startChar)) {
1942 while (pos < length && IsWordPartSeparator(cb.CharAt(pos)))
1943 ++pos;
1944 startChar = cb.CharAt(pos);
1946 if (!isascii(startChar)) {
1947 while (pos < length && !isascii(cb.CharAt(pos)))
1948 ++pos;
1949 } else if (IsLowerCase(startChar)) {
1950 while (pos < length && IsLowerCase(cb.CharAt(pos)))
1951 ++pos;
1952 } else if (IsUpperCase(startChar)) {
1953 if (IsLowerCase(cb.CharAt(pos + 1))) {
1954 ++pos;
1955 while (pos < length && IsLowerCase(cb.CharAt(pos)))
1956 ++pos;
1957 } else {
1958 while (pos < length && IsUpperCase(cb.CharAt(pos)))
1959 ++pos;
1961 if (IsLowerCase(cb.CharAt(pos)) && IsUpperCase(cb.CharAt(pos - 1)))
1962 --pos;
1963 } else if (IsADigit(startChar)) {
1964 while (pos < length && IsADigit(cb.CharAt(pos)))
1965 ++pos;
1966 } else if (IsPunctuation(startChar)) {
1967 while (pos < length && IsPunctuation(cb.CharAt(pos)))
1968 ++pos;
1969 } else if (isspacechar(startChar)) {
1970 while (pos < length && isspacechar(cb.CharAt(pos)))
1971 ++pos;
1972 } else {
1973 ++pos;
1975 return pos;
1978 bool IsLineEndChar(char c) {
1979 return (c == '\n' || c == '\r');
1982 int Document::ExtendStyleRange(int pos, int delta, bool singleLine) {
1983 int sStart = cb.StyleAt(pos);
1984 if (delta < 0) {
1985 while (pos > 0 && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsLineEndChar(cb.CharAt(pos))))
1986 pos--;
1987 pos++;
1988 } else {
1989 while (pos < (Length()) && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsLineEndChar(cb.CharAt(pos))))
1990 pos++;
1992 return pos;
1995 static char BraceOpposite(char ch) {
1996 switch (ch) {
1997 case '(':
1998 return ')';
1999 case ')':
2000 return '(';
2001 case '[':
2002 return ']';
2003 case ']':
2004 return '[';
2005 case '{':
2006 return '}';
2007 case '}':
2008 return '{';
2009 case '<':
2010 return '>';
2011 case '>':
2012 return '<';
2013 default:
2014 return '\0';
2018 // TODO: should be able to extend styled region to find matching brace
2019 int Document::BraceMatch(int position, int /*maxReStyle*/) {
2020 char chBrace = CharAt(position);
2021 char chSeek = BraceOpposite(chBrace);
2022 if (chSeek == '\0')
2023 return - 1;
2024 char styBrace = static_cast<char>(StyleAt(position) & stylingBitsMask);
2025 int direction = -1;
2026 if (chBrace == '(' || chBrace == '[' || chBrace == '{' || chBrace == '<')
2027 direction = 1;
2028 int depth = 1;
2029 position = NextPosition(position, direction);
2030 while ((position >= 0) && (position < Length())) {
2031 char chAtPos = CharAt(position);
2032 char styAtPos = static_cast<char>(StyleAt(position) & stylingBitsMask);
2033 if ((position > GetEndStyled()) || (styAtPos == styBrace)) {
2034 if (chAtPos == chBrace)
2035 depth++;
2036 if (chAtPos == chSeek)
2037 depth--;
2038 if (depth == 0)
2039 return position;
2041 int positionBeforeMove = position;
2042 position = NextPosition(position, direction);
2043 if (position == positionBeforeMove)
2044 break;
2046 return - 1;
2050 * Implementation of RegexSearchBase for the default built-in regular expression engine
2052 class BuiltinRegex : public RegexSearchBase {
2053 public:
2054 BuiltinRegex(CharClassify *charClassTable) : search(charClassTable), substituted(NULL) {}
2056 virtual ~BuiltinRegex() {
2057 delete substituted;
2060 virtual long FindText(Document *doc, int minPos, int maxPos, const char *s,
2061 bool caseSensitive, bool word, bool wordStart, int flags,
2062 int *length);
2064 virtual const char *SubstituteByPosition(Document *doc, const char *text, int *length);
2066 private:
2067 RESearch search;
2068 char *substituted;
2071 // Define a way for the Regular Expression code to access the document
2072 class DocumentIndexer : public CharacterIndexer {
2073 Document *pdoc;
2074 int end;
2075 public:
2076 DocumentIndexer(Document *pdoc_, int end_) :
2077 pdoc(pdoc_), end(end_) {
2080 virtual ~DocumentIndexer() {
2083 virtual char CharAt(int index) {
2084 if (index < 0 || index >= end)
2085 return 0;
2086 else
2087 return pdoc->CharAt(index);
2091 long BuiltinRegex::FindText(Document *doc, int minPos, int maxPos, const char *s,
2092 bool caseSensitive, bool, bool, int flags,
2093 int *length) {
2094 bool posix = (flags & SCFIND_POSIX) != 0;
2095 int increment = (minPos <= maxPos) ? 1 : -1;
2097 int startPos = minPos;
2098 int endPos = maxPos;
2100 // Range endpoints should not be inside DBCS characters, but just in case, move them.
2101 startPos = doc->MovePositionOutsideChar(startPos, 1, false);
2102 endPos = doc->MovePositionOutsideChar(endPos, 1, false);
2104 const char *errmsg = search.Compile(s, *length, caseSensitive, posix);
2105 if (errmsg) {
2106 return -1;
2108 // Find a variable in a property file: \$(\([A-Za-z0-9_.]+\))
2109 // Replace first '.' with '-' in each property file variable reference:
2110 // Search: \$(\([A-Za-z0-9_-]+\)\.\([A-Za-z0-9_.]+\))
2111 // Replace: $(\1-\2)
2112 int lineRangeStart = doc->LineFromPosition(startPos);
2113 int lineRangeEnd = doc->LineFromPosition(endPos);
2114 if ((increment == 1) &&
2115 (startPos >= doc->LineEnd(lineRangeStart)) &&
2116 (lineRangeStart < lineRangeEnd)) {
2117 // the start position is at end of line or between line end characters.
2118 lineRangeStart++;
2119 startPos = doc->LineStart(lineRangeStart);
2120 } else if ((increment == -1) &&
2121 (startPos <= doc->LineStart(lineRangeStart)) &&
2122 (lineRangeStart > lineRangeEnd)) {
2123 // the start position is at beginning of line.
2124 lineRangeStart--;
2125 startPos = doc->LineEnd(lineRangeStart);
2127 int pos = -1;
2128 int lenRet = 0;
2129 char searchEnd = s[*length - 1];
2130 char searchEndPrev = (*length > 1) ? s[*length - 2] : '\0';
2131 int lineRangeBreak = lineRangeEnd + increment;
2132 for (int line = lineRangeStart; line != lineRangeBreak; line += increment) {
2133 int startOfLine = doc->LineStart(line);
2134 int endOfLine = doc->LineEnd(line);
2135 if (increment == 1) {
2136 if (line == lineRangeStart) {
2137 if ((startPos != startOfLine) && (s[0] == '^'))
2138 continue; // Can't match start of line if start position after start of line
2139 startOfLine = startPos;
2141 if (line == lineRangeEnd) {
2142 if ((endPos != endOfLine) && (searchEnd == '$') && (searchEndPrev != '\\'))
2143 continue; // Can't match end of line if end position before end of line
2144 endOfLine = endPos;
2146 } else {
2147 if (line == lineRangeEnd) {
2148 if ((endPos != startOfLine) && (s[0] == '^'))
2149 continue; // Can't match start of line if end position after start of line
2150 startOfLine = endPos;
2152 if (line == lineRangeStart) {
2153 if ((startPos != endOfLine) && (searchEnd == '$') && (searchEndPrev != '\\'))
2154 continue; // Can't match end of line if start position before end of line
2155 endOfLine = startPos;
2159 DocumentIndexer di(doc, endOfLine);
2160 int success = search.Execute(di, startOfLine, endOfLine);
2161 if (success) {
2162 pos = search.bopat[0];
2163 lenRet = search.eopat[0] - search.bopat[0];
2164 // There can be only one start of a line, so no need to look for last match in line
2165 if ((increment == -1) && (s[0] != '^')) {
2166 // Check for the last match on this line.
2167 int repetitions = 1000; // Break out of infinite loop
2168 while (success && (search.eopat[0] <= endOfLine) && (repetitions--)) {
2169 success = search.Execute(di, pos+1, endOfLine);
2170 if (success) {
2171 if (search.eopat[0] <= minPos) {
2172 pos = search.bopat[0];
2173 lenRet = search.eopat[0] - search.bopat[0];
2174 } else {
2175 success = 0;
2180 break;
2183 *length = lenRet;
2184 return pos;
2187 const char *BuiltinRegex::SubstituteByPosition(Document *doc, const char *text, int *length) {
2188 delete []substituted;
2189 substituted = 0;
2190 DocumentIndexer di(doc, doc->Length());
2191 if (!search.GrabMatches(di))
2192 return 0;
2193 unsigned int lenResult = 0;
2194 for (int i = 0; i < *length; i++) {
2195 if (text[i] == '\\') {
2196 if (text[i + 1] >= '0' && text[i + 1] <= '9') {
2197 unsigned int patNum = text[i + 1] - '0';
2198 lenResult += search.eopat[patNum] - search.bopat[patNum];
2199 i++;
2200 } else {
2201 switch (text[i + 1]) {
2202 case 'a':
2203 case 'b':
2204 case 'f':
2205 case 'n':
2206 case 'r':
2207 case 't':
2208 case 'v':
2209 case '\\':
2210 i++;
2212 lenResult++;
2214 } else {
2215 lenResult++;
2218 substituted = new char[lenResult + 1];
2219 char *o = substituted;
2220 for (int j = 0; j < *length; j++) {
2221 if (text[j] == '\\') {
2222 if (text[j + 1] >= '0' && text[j + 1] <= '9') {
2223 unsigned int patNum = text[j + 1] - '0';
2224 unsigned int len = search.eopat[patNum] - search.bopat[patNum];
2225 if (search.pat[patNum]) // Will be null if try for a match that did not occur
2226 memcpy(o, search.pat[patNum], len);
2227 o += len;
2228 j++;
2229 } else {
2230 j++;
2231 switch (text[j]) {
2232 case 'a':
2233 *o++ = '\a';
2234 break;
2235 case 'b':
2236 *o++ = '\b';
2237 break;
2238 case 'f':
2239 *o++ = '\f';
2240 break;
2241 case 'n':
2242 *o++ = '\n';
2243 break;
2244 case 'r':
2245 *o++ = '\r';
2246 break;
2247 case 't':
2248 *o++ = '\t';
2249 break;
2250 case 'v':
2251 *o++ = '\v';
2252 break;
2253 case '\\':
2254 *o++ = '\\';
2255 break;
2256 default:
2257 *o++ = '\\';
2258 j--;
2261 } else {
2262 *o++ = text[j];
2265 *o = '\0';
2266 *length = lenResult;
2267 return substituted;
2270 #ifndef SCI_OWNREGEX
2272 #ifdef SCI_NAMESPACE
2274 RegexSearchBase *Scintilla::CreateRegexSearch(CharClassify *charClassTable) {
2275 return new BuiltinRegex(charClassTable);
2278 #else
2280 RegexSearchBase *CreateRegexSearch(CharClassify *charClassTable) {
2281 return new BuiltinRegex(charClassTable);
2284 #endif
2286 #endif