scintilla: Update scintilla with changeset 3662:1d1c06df8a2f using gtk+3
[anjuta-extras.git] / plugins / scintilla / scintilla / Document.cxx
blob7b718f27258980d52bdaaabae3fa0170033dcd51
1 // Scintilla source code edit control
2 /** @file Document.cxx
3 ** Text document that handles notifications, DBCS, styling, words and end of line.
4 **/
5 // Copyright 1998-2011 by Neil Hodgson <neilh@scintilla.org>
6 // The License.txt file describes the conditions under which this software may be distributed.
8 #include <stdlib.h>
9 #include <string.h>
10 #include <stdio.h>
11 #include <ctype.h>
12 #include <assert.h>
14 #include <string>
15 #include <vector>
17 #include "Platform.h"
19 #include "ILexer.h"
20 #include "Scintilla.h"
22 #include "SplitVector.h"
23 #include "Partitioning.h"
24 #include "RunStyles.h"
25 #include "CellBuffer.h"
26 #include "PerLine.h"
27 #include "CharClassify.h"
28 #include "CharacterSet.h"
29 #include "Decoration.h"
30 #include "Document.h"
31 #include "RESearch.h"
32 #include "UniConversion.h"
34 #ifdef SCI_NAMESPACE
35 using namespace Scintilla;
36 #endif
38 // This is ASCII specific but is safe with chars >= 0x80
39 static inline bool isspacechar(unsigned char ch) {
40 return (ch == ' ') || ((ch >= 0x09) && (ch <= 0x0d));
43 static inline bool IsPunctuation(char ch) {
44 return isascii(ch) && ispunct(ch);
47 static inline bool IsADigit(char ch) {
48 return isascii(ch) && isdigit(ch);
51 static inline bool IsLowerCase(char ch) {
52 return isascii(ch) && islower(ch);
55 static inline bool IsUpperCase(char ch) {
56 return isascii(ch) && isupper(ch);
59 void LexInterface::Colourise(int start, int end) {
60 ElapsedTime et;
61 if (pdoc && instance && !performingStyle) {
62 // Protect against reentrance, which may occur, for example, when
63 // fold points are discovered while performing styling and the folding
64 // code looks for child lines which may trigger styling.
65 performingStyle = true;
67 int lengthDoc = pdoc->Length();
68 if (end == -1)
69 end = lengthDoc;
70 int len = end - start;
72 PLATFORM_ASSERT(len >= 0);
73 PLATFORM_ASSERT(start + len <= lengthDoc);
75 int styleStart = 0;
76 if (start > 0)
77 styleStart = pdoc->StyleAt(start - 1) & pdoc->stylingBitsMask;
79 if (len > 0) {
80 instance->Lex(start, len, styleStart, pdoc);
81 instance->Fold(start, len, styleStart, pdoc);
84 performingStyle = false;
88 Document::Document() {
89 refCount = 0;
90 #ifdef __unix__
91 eolMode = SC_EOL_LF;
92 #else
93 eolMode = SC_EOL_CRLF;
94 #endif
95 dbcsCodePage = 0;
96 stylingBits = 5;
97 stylingBitsMask = 0x1F;
98 stylingMask = 0;
99 endStyled = 0;
100 styleClock = 0;
101 enteredModification = 0;
102 enteredStyling = 0;
103 enteredReadOnlyCount = 0;
104 tabInChars = 8;
105 indentInChars = 0;
106 actualIndentInChars = 8;
107 useTabs = true;
108 tabIndents = true;
109 backspaceUnindents = false;
110 watchers = 0;
111 lenWatchers = 0;
113 matchesValid = false;
114 regex = 0;
116 perLineData[ldMarkers] = new LineMarkers();
117 perLineData[ldLevels] = new LineLevels();
118 perLineData[ldState] = new LineState();
119 perLineData[ldMargin] = new LineAnnotation();
120 perLineData[ldAnnotation] = new LineAnnotation();
122 cb.SetPerLine(this);
124 pli = 0;
127 Document::~Document() {
128 for (int i = 0; i < lenWatchers; i++) {
129 watchers[i].watcher->NotifyDeleted(this, watchers[i].userData);
131 delete []watchers;
132 for (int j=0; j<ldSize; j++) {
133 delete perLineData[j];
134 perLineData[j] = 0;
136 watchers = 0;
137 lenWatchers = 0;
138 delete regex;
139 regex = 0;
140 delete pli;
141 pli = 0;
144 void Document::Init() {
145 for (int j=0; j<ldSize; j++) {
146 if (perLineData[j])
147 perLineData[j]->Init();
151 void Document::InsertLine(int line) {
152 for (int j=0; j<ldSize; j++) {
153 if (perLineData[j])
154 perLineData[j]->InsertLine(line);
158 void Document::RemoveLine(int line) {
159 for (int j=0; j<ldSize; j++) {
160 if (perLineData[j])
161 perLineData[j]->RemoveLine(line);
165 // Increase reference count and return its previous value.
166 int Document::AddRef() {
167 return refCount++;
170 // Decrease reference count and return its previous value.
171 // Delete the document if reference count reaches zero.
172 int Document::Release() {
173 int curRefCount = --refCount;
174 if (curRefCount == 0)
175 delete this;
176 return curRefCount;
179 void Document::SetSavePoint() {
180 cb.SetSavePoint();
181 NotifySavePoint(true);
184 int Document::GetMark(int line) {
185 return static_cast<LineMarkers *>(perLineData[ldMarkers])->MarkValue(line);
188 int Document::AddMark(int line, int markerNum) {
189 if (line >= 0 && line <= LinesTotal()) {
190 int prev = static_cast<LineMarkers *>(perLineData[ldMarkers])->
191 AddMark(line, markerNum, LinesTotal());
192 DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, 0, line);
193 NotifyModified(mh);
194 return prev;
195 } else {
196 return 0;
200 void Document::AddMarkSet(int line, int valueSet) {
201 if (line < 0 || line > LinesTotal()) {
202 return;
204 unsigned int m = valueSet;
205 for (int i = 0; m; i++, m >>= 1)
206 if (m & 1)
207 static_cast<LineMarkers *>(perLineData[ldMarkers])->
208 AddMark(line, i, LinesTotal());
209 DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, 0, line);
210 NotifyModified(mh);
213 void Document::DeleteMark(int line, int markerNum) {
214 static_cast<LineMarkers *>(perLineData[ldMarkers])->DeleteMark(line, markerNum, false);
215 DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, 0, line);
216 NotifyModified(mh);
219 void Document::DeleteMarkFromHandle(int markerHandle) {
220 static_cast<LineMarkers *>(perLineData[ldMarkers])->DeleteMarkFromHandle(markerHandle);
221 DocModification mh(SC_MOD_CHANGEMARKER, 0, 0, 0, 0);
222 mh.line = -1;
223 NotifyModified(mh);
226 void Document::DeleteAllMarks(int markerNum) {
227 bool someChanges = false;
228 for (int line = 0; line < LinesTotal(); line++) {
229 if (static_cast<LineMarkers *>(perLineData[ldMarkers])->DeleteMark(line, markerNum, true))
230 someChanges = true;
232 if (someChanges) {
233 DocModification mh(SC_MOD_CHANGEMARKER, 0, 0, 0, 0);
234 mh.line = -1;
235 NotifyModified(mh);
239 int Document::LineFromHandle(int markerHandle) {
240 return static_cast<LineMarkers *>(perLineData[ldMarkers])->LineFromHandle(markerHandle);
243 int SCI_METHOD Document::LineStart(int line) const {
244 return cb.LineStart(line);
247 int Document::LineEnd(int line) const {
248 if (line == LinesTotal() - 1) {
249 return LineStart(line + 1);
250 } else {
251 int position = LineStart(line + 1) - 1;
252 // When line terminator is CR+LF, may need to go back one more
253 if ((position > LineStart(line)) && (cb.CharAt(position - 1) == '\r')) {
254 position--;
256 return position;
260 void SCI_METHOD Document::SetErrorStatus(int status) {
261 // Tell the watchers the lexer has changed.
262 for (int i = 0; i < lenWatchers; i++) {
263 watchers[i].watcher->NotifyErrorOccurred(this, watchers[i].userData, status);
267 int SCI_METHOD Document::LineFromPosition(int pos) const {
268 return cb.LineFromPosition(pos);
271 int Document::LineEndPosition(int position) const {
272 return LineEnd(LineFromPosition(position));
275 bool Document::IsLineEndPosition(int position) const {
276 return LineEnd(LineFromPosition(position)) == position;
279 int Document::VCHomePosition(int position) const {
280 int line = LineFromPosition(position);
281 int startPosition = LineStart(line);
282 int endLine = LineEnd(line);
283 int startText = startPosition;
284 while (startText < endLine && (cb.CharAt(startText) == ' ' || cb.CharAt(startText) == '\t'))
285 startText++;
286 if (position == startText)
287 return startPosition;
288 else
289 return startText;
292 int SCI_METHOD Document::SetLevel(int line, int level) {
293 int prev = static_cast<LineLevels *>(perLineData[ldLevels])->SetLevel(line, level, LinesTotal());
294 if (prev != level) {
295 DocModification mh(SC_MOD_CHANGEFOLD | SC_MOD_CHANGEMARKER,
296 LineStart(line), 0, 0, 0, line);
297 mh.foldLevelNow = level;
298 mh.foldLevelPrev = prev;
299 NotifyModified(mh);
301 return prev;
304 int SCI_METHOD Document::GetLevel(int line) const {
305 return static_cast<LineLevels *>(perLineData[ldLevels])->GetLevel(line);
308 void Document::ClearLevels() {
309 static_cast<LineLevels *>(perLineData[ldLevels])->ClearLevels();
312 static bool IsSubordinate(int levelStart, int levelTry) {
313 if (levelTry & SC_FOLDLEVELWHITEFLAG)
314 return true;
315 else
316 return (levelStart & SC_FOLDLEVELNUMBERMASK) < (levelTry & SC_FOLDLEVELNUMBERMASK);
319 int Document::GetLastChild(int lineParent, int level) {
320 if (level == -1)
321 level = GetLevel(lineParent) & SC_FOLDLEVELNUMBERMASK;
322 int maxLine = LinesTotal();
323 int lineMaxSubord = lineParent;
324 while (lineMaxSubord < maxLine - 1) {
325 EnsureStyledTo(LineStart(lineMaxSubord + 2));
326 if (!IsSubordinate(level, GetLevel(lineMaxSubord + 1)))
327 break;
328 lineMaxSubord++;
330 if (lineMaxSubord > lineParent) {
331 if (level > (GetLevel(lineMaxSubord + 1) & SC_FOLDLEVELNUMBERMASK)) {
332 // Have chewed up some whitespace that belongs to a parent so seek back
333 if (GetLevel(lineMaxSubord) & SC_FOLDLEVELWHITEFLAG) {
334 lineMaxSubord--;
338 return lineMaxSubord;
341 int Document::GetFoldParent(int line) {
342 int level = GetLevel(line) & SC_FOLDLEVELNUMBERMASK;
343 int lineLook = line - 1;
344 while ((lineLook > 0) && (
345 (!(GetLevel(lineLook) & SC_FOLDLEVELHEADERFLAG)) ||
346 ((GetLevel(lineLook) & SC_FOLDLEVELNUMBERMASK) >= level))
348 lineLook--;
350 if ((GetLevel(lineLook) & SC_FOLDLEVELHEADERFLAG) &&
351 ((GetLevel(lineLook) & SC_FOLDLEVELNUMBERMASK) < level)) {
352 return lineLook;
353 } else {
354 return -1;
358 void Document::GetHighlightDelimiters(HighlightDelimiter &highlightDelimiter, int line, int topLine, int bottomLine) {
359 int noNeedToParseBefore = Platform::Minimum(line, topLine) - 1;
360 int noNeedToParseAfter = Platform::Maximum(line, bottomLine) + 1;
361 int endLine = LineFromPosition(Length());
362 int beginFoldBlock = noNeedToParseBefore;
363 int endFoldBlock = -1;
364 int beginMarginCorrectlyDrawnZone = noNeedToParseBefore;
365 int endMarginCorrectlyDrawnZone = noNeedToParseAfter;
366 int endOfTailOfWhiteFlag = -1; //endOfTailOfWhiteFlag points the last SC_FOLDLEVELWHITEFLAG if follow a fold block. Otherwise endOfTailOfWhiteFlag points end of fold block.
367 int level = GetLevel(line);
368 int levelNumber = -1;
369 int lineLookLevel = 0;
370 int lineLookLevelNumber = -1;
371 int lineLook = line;
372 bool beginFoldBlockFound = false;
373 bool endFoldBlockFound = false;
374 bool beginMarginCorrectlyDrawnZoneFound = false;
375 bool endMarginCorrectlyDrawnZoneFound = false;
377 /*******************************************************************************/
378 /* search backward (beginFoldBlock & beginMarginCorrectlyDrawnZone) */
379 /*******************************************************************************/
380 for (endOfTailOfWhiteFlag = line; (lineLook > noNeedToParseBefore || (lineLookLevel & SC_FOLDLEVELWHITEFLAG)) && (!beginFoldBlockFound || !beginMarginCorrectlyDrawnZoneFound); --lineLook) {
381 lineLookLevel = GetLevel(lineLook);
382 if (levelNumber != -1) {
383 lineLookLevelNumber = lineLookLevel & SC_FOLDLEVELNUMBERMASK;
384 if (!beginMarginCorrectlyDrawnZoneFound && (lineLookLevelNumber > levelNumber)) {
385 beginMarginCorrectlyDrawnZoneFound = true;
386 beginMarginCorrectlyDrawnZone = endOfTailOfWhiteFlag;
388 //find the last space line (SC_FOLDLEVELWHITEFLAG).
389 if (!beginMarginCorrectlyDrawnZoneFound && !(lineLookLevel & SC_FOLDLEVELWHITEFLAG)) {
390 endOfTailOfWhiteFlag = lineLook - 1;
392 if (!beginFoldBlockFound && (lineLookLevelNumber < levelNumber)) {
393 beginFoldBlockFound = true;
394 beginFoldBlock = lineLook;
395 if (!beginMarginCorrectlyDrawnZoneFound) {
396 beginMarginCorrectlyDrawnZoneFound = true;
397 beginMarginCorrectlyDrawnZone = lineLook - 1;
399 } else if (!beginFoldBlockFound && lineLookLevelNumber == SC_FOLDLEVELBASE) {
400 beginFoldBlockFound = true;
401 beginFoldBlock = -1;
403 } else if (!(lineLookLevel & SC_FOLDLEVELWHITEFLAG)) {
404 endOfTailOfWhiteFlag = lineLook - 1;
405 levelNumber = lineLookLevel & SC_FOLDLEVELNUMBERMASK;
406 if (lineLookLevel & SC_FOLDLEVELHEADERFLAG &&
407 //Managed the folding block when a fold header does not have any subordinate lines to fold away.
408 (levelNumber < (GetLevel(lineLook + 1) & SC_FOLDLEVELNUMBERMASK))) {
409 beginFoldBlockFound = true;
410 beginFoldBlock = lineLook;
411 beginMarginCorrectlyDrawnZoneFound = true;
412 beginMarginCorrectlyDrawnZone = endOfTailOfWhiteFlag;
413 levelNumber = GetLevel(lineLook + 1) & SC_FOLDLEVELNUMBERMASK;;
418 /****************************************************************************/
419 /* search forward (endStartBlock & endMarginCorrectlyDrawnZone) */
420 /****************************************************************************/
421 if (level & SC_FOLDLEVELHEADERFLAG) {
422 //ignore this line because this line is on first one of block.
423 lineLook = line + 1;
424 } else {
425 lineLook = line;
427 for (; lineLook < noNeedToParseAfter && (!endFoldBlockFound || !endMarginCorrectlyDrawnZoneFound); ++lineLook) {
428 lineLookLevel = GetLevel(lineLook);
429 lineLookLevelNumber = lineLookLevel & SC_FOLDLEVELNUMBERMASK;
430 if (!endFoldBlockFound && !(lineLookLevel & SC_FOLDLEVELWHITEFLAG) && lineLookLevelNumber < levelNumber) {
431 endFoldBlockFound = true;
432 endFoldBlock = lineLook - 1;
433 if (!endMarginCorrectlyDrawnZoneFound) {
434 endMarginCorrectlyDrawnZoneFound = true;
435 endMarginCorrectlyDrawnZone = lineLook;
437 } else if (!endFoldBlockFound && lineLookLevel == SC_FOLDLEVELBASE) {
438 endFoldBlockFound = true;
439 endFoldBlock = -1;
441 if (!endMarginCorrectlyDrawnZoneFound && (lineLookLevel & SC_FOLDLEVELHEADERFLAG) &&
442 //Managed the folding block when a fold header does not have any subordinate lines to fold away.
443 (levelNumber < (GetLevel(lineLook + 1) & SC_FOLDLEVELNUMBERMASK))) {
444 endMarginCorrectlyDrawnZoneFound = true;
445 endMarginCorrectlyDrawnZone = lineLook;
448 if (!endFoldBlockFound && ((lineLook > endLine && lineLookLevelNumber < levelNumber) ||
449 (levelNumber > SC_FOLDLEVELBASE))) {
450 //manage when endfold is incorrect or on last line.
451 endFoldBlock = lineLook - 1;
452 //useless to set endMarginCorrectlyDrawnZone.
453 //if endMarginCorrectlyDrawnZoneFound equals false then endMarginCorrectlyDrawnZone already equals to endLine + 1.
456 highlightDelimiter.beginFoldBlock = beginFoldBlock;
457 highlightDelimiter.endFoldBlock = endFoldBlock;
458 highlightDelimiter.beginMarginCorrectlyDrawnZone = beginMarginCorrectlyDrawnZone;
459 highlightDelimiter.endMarginCorrectlyDrawnZone = endMarginCorrectlyDrawnZone;
462 int Document::ClampPositionIntoDocument(int pos) {
463 return Platform::Clamp(pos, 0, Length());
466 bool Document::IsCrLf(int pos) {
467 if (pos < 0)
468 return false;
469 if (pos >= (Length() - 1))
470 return false;
471 return (cb.CharAt(pos) == '\r') && (cb.CharAt(pos + 1) == '\n');
474 int Document::LenChar(int pos) {
475 if (pos < 0) {
476 return 1;
477 } else if (IsCrLf(pos)) {
478 return 2;
479 } else if (SC_CP_UTF8 == dbcsCodePage) {
480 unsigned char ch = static_cast<unsigned char>(cb.CharAt(pos));
481 if (ch < 0x80)
482 return 1;
483 int len = 2;
484 if (ch >= (0x80 + 0x40 + 0x20 + 0x10))
485 len = 4;
486 else if (ch >= (0x80 + 0x40 + 0x20))
487 len = 3;
488 int lengthDoc = Length();
489 if ((pos + len) > lengthDoc)
490 return lengthDoc -pos;
491 else
492 return len;
493 } else if (dbcsCodePage) {
494 return IsDBCSLeadByte(cb.CharAt(pos)) ? 2 : 1;
495 } else {
496 return 1;
500 static bool IsTrailByte(int ch) {
501 return (ch >= 0x80) && (ch < (0x80 + 0x40));
504 static int BytesFromLead(int leadByte) {
505 if (leadByte > 0xF4) {
506 // Characters longer than 4 bytes not possible in current UTF-8
507 return 0;
508 } else if (leadByte >= 0xF0) {
509 return 4;
510 } else if (leadByte >= 0xE0) {
511 return 3;
512 } else if (leadByte >= 0xC2) {
513 return 2;
515 return 0;
518 bool Document::InGoodUTF8(int pos, int &start, int &end) const {
519 int lead = pos;
520 while ((lead>0) && (pos-lead < 4) && IsTrailByte(static_cast<unsigned char>(cb.CharAt(lead-1))))
521 lead--;
522 start = 0;
523 if (lead > 0) {
524 start = lead-1;
526 int leadByte = static_cast<unsigned char>(cb.CharAt(start));
527 int bytes = BytesFromLead(leadByte);
528 if (bytes == 0) {
529 return false;
530 } else {
531 int trailBytes = bytes - 1;
532 int len = pos - lead + 1;
533 if (len > trailBytes)
534 // pos too far from lead
535 return false;
536 // Check that there are enough trails for this lead
537 int trail = pos + 1;
538 while ((trail-lead<trailBytes) && (trail < Length())) {
539 if (!IsTrailByte(static_cast<unsigned char>(cb.CharAt(trail)))) {
540 return false;
542 trail++;
544 end = start + bytes;
545 return true;
549 // Normalise a position so that it is not halfway through a two byte character.
550 // This can occur in two situations -
551 // When lines are terminated with \r\n pairs which should be treated as one character.
552 // When displaying DBCS text such as Japanese.
553 // If moving, move the position in the indicated direction.
554 int Document::MovePositionOutsideChar(int pos, int moveDir, bool checkLineEnd) {
555 //Platform::DebugPrintf("NoCRLF %d %d\n", pos, moveDir);
556 // If out of range, just return minimum/maximum value.
557 if (pos <= 0)
558 return 0;
559 if (pos >= Length())
560 return Length();
562 // PLATFORM_ASSERT(pos > 0 && pos < Length());
563 if (checkLineEnd && IsCrLf(pos - 1)) {
564 if (moveDir > 0)
565 return pos + 1;
566 else
567 return pos - 1;
570 if (dbcsCodePage) {
571 if (SC_CP_UTF8 == dbcsCodePage) {
572 unsigned char ch = static_cast<unsigned char>(cb.CharAt(pos));
573 int startUTF = pos;
574 int endUTF = pos;
575 if (IsTrailByte(ch) && InGoodUTF8(pos, startUTF, endUTF)) {
576 // ch is a trail byte within a UTF-8 character
577 if (moveDir > 0)
578 pos = endUTF;
579 else
580 pos = startUTF;
582 } else {
583 // Anchor DBCS calculations at start of line because start of line can
584 // not be a DBCS trail byte.
585 int posStartLine = LineStart(LineFromPosition(pos));
586 if (pos == posStartLine)
587 return pos;
589 // Step back until a non-lead-byte is found.
590 int posCheck = pos;
591 while ((posCheck > posStartLine) && IsDBCSLeadByte(cb.CharAt(posCheck-1)))
592 posCheck--;
594 // Check from known start of character.
595 while (posCheck < pos) {
596 int mbsize = IsDBCSLeadByte(cb.CharAt(posCheck)) ? 2 : 1;
597 if (posCheck + mbsize == pos) {
598 return pos;
599 } else if (posCheck + mbsize > pos) {
600 if (moveDir > 0) {
601 return posCheck + mbsize;
602 } else {
603 return posCheck;
606 posCheck += mbsize;
611 return pos;
614 // NextPosition moves between valid positions - it can not handle a position in the middle of a
615 // multi-byte character. It is used to iterate through text more efficiently than MovePositionOutsideChar.
616 // A \r\n pair is treated as two characters.
617 int Document::NextPosition(int pos, int moveDir) const {
618 // If out of range, just return minimum/maximum value.
619 int increment = (moveDir > 0) ? 1 : -1;
620 if (pos + increment <= 0)
621 return 0;
622 if (pos + increment >= Length())
623 return Length();
625 if (dbcsCodePage) {
626 if (SC_CP_UTF8 == dbcsCodePage) {
627 pos += increment;
628 unsigned char ch = static_cast<unsigned char>(cb.CharAt(pos));
629 int startUTF = pos;
630 int endUTF = pos;
631 if (IsTrailByte(ch) && InGoodUTF8(pos, startUTF, endUTF)) {
632 // ch is a trail byte within a UTF-8 character
633 if (moveDir > 0)
634 pos = endUTF;
635 else
636 pos = startUTF;
638 } else {
639 if (moveDir > 0) {
640 int mbsize = IsDBCSLeadByte(cb.CharAt(pos)) ? 2 : 1;
641 pos += mbsize;
642 if (pos > Length())
643 pos = Length();
644 } else {
645 // Anchor DBCS calculations at start of line because start of line can
646 // not be a DBCS trail byte.
647 int posStartLine = LineStart(LineFromPosition(pos));
648 // See http://msdn.microsoft.com/en-us/library/cc194792%28v=MSDN.10%29.aspx
649 // http://msdn.microsoft.com/en-us/library/cc194790.aspx
650 if ((pos - 1) <= posStartLine) {
651 return pos - 1;
652 } else if (IsDBCSLeadByte(cb.CharAt(pos - 1))) {
653 // Must actually be trail byte
654 return pos - 2;
655 } else {
656 // Otherwise, step back until a non-lead-byte is found.
657 int posTemp = pos - 1;
658 while (posStartLine <= --posTemp && IsDBCSLeadByte(cb.CharAt(posTemp)))
660 // Now posTemp+1 must point to the beginning of a character,
661 // so figure out whether we went back an even or an odd
662 // number of bytes and go back 1 or 2 bytes, respectively.
663 return (pos - 1 - ((pos - posTemp) & 1));
667 } else {
668 pos += increment;
671 return pos;
674 bool Document::NextCharacter(int &pos, int moveDir) {
675 // Returns true if pos changed
676 int posNext = NextPosition(pos, moveDir);
677 if (posNext == pos) {
678 return false;
679 } else {
680 pos = posNext;
681 return true;
685 int SCI_METHOD Document::CodePage() const {
686 return dbcsCodePage;
689 bool SCI_METHOD Document::IsDBCSLeadByte(char ch) const {
690 // Byte ranges found in Wikipedia articles with relevant search strings in each case
691 unsigned char uch = static_cast<unsigned char>(ch);
692 switch (dbcsCodePage) {
693 case 932:
694 // Shift_jis
695 return ((uch >= 0x81) && (uch <= 0x9F)) ||
696 ((uch >= 0xE0) && (uch <= 0xEF));
697 case 936:
698 // GBK
699 return (uch >= 0x81) && (uch <= 0xFE);
700 case 949:
701 // Korean Wansung KS C-5601-1987
702 return (uch >= 0x81) && (uch <= 0xFE);
703 case 950:
704 // Big5
705 return (uch >= 0x81) && (uch <= 0xFE);
706 case 1361:
707 // Korean Johab KS C-5601-1992
708 return
709 ((uch >= 0x84) && (uch <= 0xD3)) ||
710 ((uch >= 0xD8) && (uch <= 0xDE)) ||
711 ((uch >= 0xE0) && (uch <= 0xF9));
713 return false;
716 inline bool IsSpaceOrTab(int ch) {
717 return ch == ' ' || ch == '\t';
720 // Need to break text into segments near lengthSegment but taking into
721 // account the encoding to not break inside a UTF-8 or DBCS character
722 // and also trying to avoid breaking inside a pair of combining characters.
723 // The segment length must always be long enough (more than 4 bytes)
724 // so that there will be at least one whole character to make a segment.
725 // For UTF-8, text must consist only of valid whole characters.
726 // In preference order from best to worst:
727 // 1) Break after space
728 // 2) Break before punctuation
729 // 3) Break after whole character
731 int Document::SafeSegment(const char *text, int length, int lengthSegment) {
732 if (length <= lengthSegment)
733 return length;
734 int lastSpaceBreak = -1;
735 int lastPunctuationBreak = -1;
736 int lastEncodingAllowedBreak = -1;
737 for (int j=0; j < lengthSegment;) {
738 unsigned char ch = static_cast<unsigned char>(text[j]);
739 if (j > 0) {
740 if (IsSpaceOrTab(text[j - 1]) && !IsSpaceOrTab(text[j])) {
741 lastSpaceBreak = j;
743 if (ch < 'A') {
744 lastPunctuationBreak = j;
747 lastEncodingAllowedBreak = j;
749 if (dbcsCodePage == SC_CP_UTF8) {
750 j += (ch < 0x80) ? 1 : BytesFromLead(ch);
751 } else if (dbcsCodePage) {
752 j += IsDBCSLeadByte(ch) ? 2 : 1;
753 } else {
754 j++;
757 if (lastSpaceBreak >= 0) {
758 return lastSpaceBreak;
759 } else if (lastPunctuationBreak >= 0) {
760 return lastPunctuationBreak;
762 return lastEncodingAllowedBreak;
765 void Document::ModifiedAt(int pos) {
766 if (endStyled > pos)
767 endStyled = pos;
770 void Document::CheckReadOnly() {
771 if (cb.IsReadOnly() && enteredReadOnlyCount == 0) {
772 enteredReadOnlyCount++;
773 NotifyModifyAttempt();
774 enteredReadOnlyCount--;
778 // Document only modified by gateways DeleteChars, InsertString, Undo, Redo, and SetStyleAt.
779 // SetStyleAt does not change the persistent state of a document
781 bool Document::DeleteChars(int pos, int len) {
782 if (len == 0)
783 return false;
784 if ((pos + len) > Length())
785 return false;
786 CheckReadOnly();
787 if (enteredModification != 0) {
788 return false;
789 } else {
790 enteredModification++;
791 if (!cb.IsReadOnly()) {
792 NotifyModified(
793 DocModification(
794 SC_MOD_BEFOREDELETE | SC_PERFORMED_USER,
795 pos, len,
796 0, 0));
797 int prevLinesTotal = LinesTotal();
798 bool startSavePoint = cb.IsSavePoint();
799 bool startSequence = false;
800 const char *text = cb.DeleteChars(pos, len, startSequence);
801 if (startSavePoint && cb.IsCollectingUndo())
802 NotifySavePoint(!startSavePoint);
803 if ((pos < Length()) || (pos == 0))
804 ModifiedAt(pos);
805 else
806 ModifiedAt(pos-1);
807 NotifyModified(
808 DocModification(
809 SC_MOD_DELETETEXT | SC_PERFORMED_USER | (startSequence?SC_STARTACTION:0),
810 pos, len,
811 LinesTotal() - prevLinesTotal, text));
813 enteredModification--;
815 return !cb.IsReadOnly();
819 * Insert a string with a length.
821 bool Document::InsertString(int position, const char *s, int insertLength) {
822 if (insertLength <= 0) {
823 return false;
825 CheckReadOnly();
826 if (enteredModification != 0) {
827 return false;
828 } else {
829 enteredModification++;
830 if (!cb.IsReadOnly()) {
831 NotifyModified(
832 DocModification(
833 SC_MOD_BEFOREINSERT | SC_PERFORMED_USER,
834 position, insertLength,
835 0, s));
836 int prevLinesTotal = LinesTotal();
837 bool startSavePoint = cb.IsSavePoint();
838 bool startSequence = false;
839 const char *text = cb.InsertString(position, s, insertLength, startSequence);
840 if (startSavePoint && cb.IsCollectingUndo())
841 NotifySavePoint(!startSavePoint);
842 ModifiedAt(position);
843 NotifyModified(
844 DocModification(
845 SC_MOD_INSERTTEXT | SC_PERFORMED_USER | (startSequence?SC_STARTACTION:0),
846 position, insertLength,
847 LinesTotal() - prevLinesTotal, text));
849 enteredModification--;
851 return !cb.IsReadOnly();
854 int Document::Undo() {
855 int newPos = -1;
856 CheckReadOnly();
857 if (enteredModification == 0) {
858 enteredModification++;
859 if (!cb.IsReadOnly()) {
860 bool startSavePoint = cb.IsSavePoint();
861 bool multiLine = false;
862 int steps = cb.StartUndo();
863 //Platform::DebugPrintf("Steps=%d\n", steps);
864 for (int step = 0; step < steps; step++) {
865 const int prevLinesTotal = LinesTotal();
866 const Action &action = cb.GetUndoStep();
867 if (action.at == removeAction) {
868 NotifyModified(DocModification(
869 SC_MOD_BEFOREINSERT | SC_PERFORMED_UNDO, action));
870 } else if (action.at == containerAction) {
871 DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_UNDO);
872 dm.token = action.position;
873 NotifyModified(dm);
874 } else {
875 NotifyModified(DocModification(
876 SC_MOD_BEFOREDELETE | SC_PERFORMED_UNDO, action));
878 cb.PerformUndoStep();
879 int cellPosition = action.position;
880 if (action.at != containerAction) {
881 ModifiedAt(cellPosition);
882 newPos = cellPosition;
885 int modFlags = SC_PERFORMED_UNDO;
886 // With undo, an insertion action becomes a deletion notification
887 if (action.at == removeAction) {
888 newPos += action.lenData;
889 modFlags |= SC_MOD_INSERTTEXT;
890 } else if (action.at == insertAction) {
891 modFlags |= SC_MOD_DELETETEXT;
893 if (steps > 1)
894 modFlags |= SC_MULTISTEPUNDOREDO;
895 const int linesAdded = LinesTotal() - prevLinesTotal;
896 if (linesAdded != 0)
897 multiLine = true;
898 if (step == steps - 1) {
899 modFlags |= SC_LASTSTEPINUNDOREDO;
900 if (multiLine)
901 modFlags |= SC_MULTILINEUNDOREDO;
903 NotifyModified(DocModification(modFlags, cellPosition, action.lenData,
904 linesAdded, action.data));
907 bool endSavePoint = cb.IsSavePoint();
908 if (startSavePoint != endSavePoint)
909 NotifySavePoint(endSavePoint);
911 enteredModification--;
913 return newPos;
916 int Document::Redo() {
917 int newPos = -1;
918 CheckReadOnly();
919 if (enteredModification == 0) {
920 enteredModification++;
921 if (!cb.IsReadOnly()) {
922 bool startSavePoint = cb.IsSavePoint();
923 bool multiLine = false;
924 int steps = cb.StartRedo();
925 for (int step = 0; step < steps; step++) {
926 const int prevLinesTotal = LinesTotal();
927 const Action &action = cb.GetRedoStep();
928 if (action.at == insertAction) {
929 NotifyModified(DocModification(
930 SC_MOD_BEFOREINSERT | SC_PERFORMED_REDO, action));
931 } else if (action.at == containerAction) {
932 DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_REDO);
933 dm.token = action.position;
934 NotifyModified(dm);
935 } else {
936 NotifyModified(DocModification(
937 SC_MOD_BEFOREDELETE | SC_PERFORMED_REDO, action));
939 cb.PerformRedoStep();
940 if (action.at != containerAction) {
941 ModifiedAt(action.position);
942 newPos = action.position;
945 int modFlags = SC_PERFORMED_REDO;
946 if (action.at == insertAction) {
947 newPos += action.lenData;
948 modFlags |= SC_MOD_INSERTTEXT;
949 } else if (action.at == removeAction) {
950 modFlags |= SC_MOD_DELETETEXT;
952 if (steps > 1)
953 modFlags |= SC_MULTISTEPUNDOREDO;
954 const int linesAdded = LinesTotal() - prevLinesTotal;
955 if (linesAdded != 0)
956 multiLine = true;
957 if (step == steps - 1) {
958 modFlags |= SC_LASTSTEPINUNDOREDO;
959 if (multiLine)
960 modFlags |= SC_MULTILINEUNDOREDO;
962 NotifyModified(
963 DocModification(modFlags, action.position, action.lenData,
964 linesAdded, action.data));
967 bool endSavePoint = cb.IsSavePoint();
968 if (startSavePoint != endSavePoint)
969 NotifySavePoint(endSavePoint);
971 enteredModification--;
973 return newPos;
977 * Insert a single character.
979 bool Document::InsertChar(int pos, char ch) {
980 char chs[1];
981 chs[0] = ch;
982 return InsertString(pos, chs, 1);
986 * Insert a null terminated string.
988 bool Document::InsertCString(int position, const char *s) {
989 return InsertString(position, s, strlen(s));
992 void Document::ChangeChar(int pos, char ch) {
993 DeleteChars(pos, 1);
994 InsertChar(pos, ch);
997 void Document::DelChar(int pos) {
998 DeleteChars(pos, LenChar(pos));
1001 void Document::DelCharBack(int pos) {
1002 if (pos <= 0) {
1003 return;
1004 } else if (IsCrLf(pos - 2)) {
1005 DeleteChars(pos - 2, 2);
1006 } else if (dbcsCodePage) {
1007 int startChar = NextPosition(pos, -1);
1008 DeleteChars(startChar, pos - startChar);
1009 } else {
1010 DeleteChars(pos - 1, 1);
1014 static bool isindentchar(char ch) {
1015 return (ch == ' ') || (ch == '\t');
1018 static int NextTab(int pos, int tabSize) {
1019 return ((pos / tabSize) + 1) * tabSize;
1022 static void CreateIndentation(char *linebuf, int length, int indent, int tabSize, bool insertSpaces) {
1023 length--; // ensure space for \0
1024 if (!insertSpaces) {
1025 while ((indent >= tabSize) && (length > 0)) {
1026 *linebuf++ = '\t';
1027 indent -= tabSize;
1028 length--;
1031 while ((indent > 0) && (length > 0)) {
1032 *linebuf++ = ' ';
1033 indent--;
1034 length--;
1036 *linebuf = '\0';
1039 int SCI_METHOD Document::GetLineIndentation(int line) {
1040 int indent = 0;
1041 if ((line >= 0) && (line < LinesTotal())) {
1042 int lineStart = LineStart(line);
1043 int length = Length();
1044 for (int i = lineStart; i < length; i++) {
1045 char ch = cb.CharAt(i);
1046 if (ch == ' ')
1047 indent++;
1048 else if (ch == '\t')
1049 indent = NextTab(indent, tabInChars);
1050 else
1051 return indent;
1054 return indent;
1057 void Document::SetLineIndentation(int line, int indent) {
1058 int indentOfLine = GetLineIndentation(line);
1059 if (indent < 0)
1060 indent = 0;
1061 if (indent != indentOfLine) {
1062 char linebuf[1000];
1063 CreateIndentation(linebuf, sizeof(linebuf), indent, tabInChars, !useTabs);
1064 int thisLineStart = LineStart(line);
1065 int indentPos = GetLineIndentPosition(line);
1066 UndoGroup ug(this);
1067 DeleteChars(thisLineStart, indentPos - thisLineStart);
1068 InsertCString(thisLineStart, linebuf);
1072 int Document::GetLineIndentPosition(int line) const {
1073 if (line < 0)
1074 return 0;
1075 int pos = LineStart(line);
1076 int length = Length();
1077 while ((pos < length) && isindentchar(cb.CharAt(pos))) {
1078 pos++;
1080 return pos;
1083 int Document::GetColumn(int pos) {
1084 int column = 0;
1085 int line = LineFromPosition(pos);
1086 if ((line >= 0) && (line < LinesTotal())) {
1087 for (int i = LineStart(line); i < pos;) {
1088 char ch = cb.CharAt(i);
1089 if (ch == '\t') {
1090 column = NextTab(column, tabInChars);
1091 i++;
1092 } else if (ch == '\r') {
1093 return column;
1094 } else if (ch == '\n') {
1095 return column;
1096 } else if (i >= Length()) {
1097 return column;
1098 } else {
1099 column++;
1100 i = NextPosition(i, 1);
1104 return column;
1107 int Document::FindColumn(int line, int column) {
1108 int position = LineStart(line);
1109 if ((line >= 0) && (line < LinesTotal())) {
1110 int columnCurrent = 0;
1111 while ((columnCurrent < column) && (position < Length())) {
1112 char ch = cb.CharAt(position);
1113 if (ch == '\t') {
1114 columnCurrent = NextTab(columnCurrent, tabInChars);
1115 position++;
1116 } else if (ch == '\r') {
1117 return position;
1118 } else if (ch == '\n') {
1119 return position;
1120 } else {
1121 columnCurrent++;
1122 position = NextPosition(position, 1);
1126 return position;
1129 void Document::Indent(bool forwards, int lineBottom, int lineTop) {
1130 // Dedent - suck white space off the front of the line to dedent by equivalent of a tab
1131 for (int line = lineBottom; line >= lineTop; line--) {
1132 int indentOfLine = GetLineIndentation(line);
1133 if (forwards) {
1134 if (LineStart(line) < LineEnd(line)) {
1135 SetLineIndentation(line, indentOfLine + IndentSize());
1137 } else {
1138 SetLineIndentation(line, indentOfLine - IndentSize());
1143 // Convert line endings for a piece of text to a particular mode.
1144 // Stop at len or when a NUL is found.
1145 // Caller must delete the returned pointer.
1146 char *Document::TransformLineEnds(int *pLenOut, const char *s, size_t len, int eolMode) {
1147 char *dest = new char[2 * len + 1];
1148 const char *sptr = s;
1149 char *dptr = dest;
1150 for (size_t i = 0; (i < len) && (*sptr != '\0'); i++) {
1151 if (*sptr == '\n' || *sptr == '\r') {
1152 if (eolMode == SC_EOL_CR) {
1153 *dptr++ = '\r';
1154 } else if (eolMode == SC_EOL_LF) {
1155 *dptr++ = '\n';
1156 } else { // eolMode == SC_EOL_CRLF
1157 *dptr++ = '\r';
1158 *dptr++ = '\n';
1160 if ((*sptr == '\r') && (i+1 < len) && (*(sptr+1) == '\n')) {
1161 i++;
1162 sptr++;
1164 sptr++;
1165 } else {
1166 *dptr++ = *sptr++;
1169 *dptr++ = '\0';
1170 *pLenOut = (dptr - dest) - 1;
1171 return dest;
1174 void Document::ConvertLineEnds(int eolModeSet) {
1175 UndoGroup ug(this);
1177 for (int pos = 0; pos < Length(); pos++) {
1178 if (cb.CharAt(pos) == '\r') {
1179 if (cb.CharAt(pos + 1) == '\n') {
1180 // CRLF
1181 if (eolModeSet == SC_EOL_CR) {
1182 DeleteChars(pos + 1, 1); // Delete the LF
1183 } else if (eolModeSet == SC_EOL_LF) {
1184 DeleteChars(pos, 1); // Delete the CR
1185 } else {
1186 pos++;
1188 } else {
1189 // CR
1190 if (eolModeSet == SC_EOL_CRLF) {
1191 InsertString(pos + 1, "\n", 1); // Insert LF
1192 pos++;
1193 } else if (eolModeSet == SC_EOL_LF) {
1194 InsertString(pos, "\n", 1); // Insert LF
1195 DeleteChars(pos + 1, 1); // Delete CR
1198 } else if (cb.CharAt(pos) == '\n') {
1199 // LF
1200 if (eolModeSet == SC_EOL_CRLF) {
1201 InsertString(pos, "\r", 1); // Insert CR
1202 pos++;
1203 } else if (eolModeSet == SC_EOL_CR) {
1204 InsertString(pos, "\r", 1); // Insert CR
1205 DeleteChars(pos + 1, 1); // Delete LF
1212 bool Document::IsWhiteLine(int line) const {
1213 int currentChar = LineStart(line);
1214 int endLine = LineEnd(line);
1215 while (currentChar < endLine) {
1216 if (cb.CharAt(currentChar) != ' ' && cb.CharAt(currentChar) != '\t') {
1217 return false;
1219 ++currentChar;
1221 return true;
1224 int Document::ParaUp(int pos) {
1225 int line = LineFromPosition(pos);
1226 line--;
1227 while (line >= 0 && IsWhiteLine(line)) { // skip empty lines
1228 line--;
1230 while (line >= 0 && !IsWhiteLine(line)) { // skip non-empty lines
1231 line--;
1233 line++;
1234 return LineStart(line);
1237 int Document::ParaDown(int pos) {
1238 int line = LineFromPosition(pos);
1239 while (line < LinesTotal() && !IsWhiteLine(line)) { // skip non-empty lines
1240 line++;
1242 while (line < LinesTotal() && IsWhiteLine(line)) { // skip empty lines
1243 line++;
1245 if (line < LinesTotal())
1246 return LineStart(line);
1247 else // end of a document
1248 return LineEnd(line-1);
1251 CharClassify::cc Document::WordCharClass(unsigned char ch) {
1252 if ((SC_CP_UTF8 == dbcsCodePage) && (ch >= 0x80))
1253 return CharClassify::ccWord;
1254 return charClass.GetClass(ch);
1258 * Used by commmands that want to select whole words.
1259 * Finds the start of word at pos when delta < 0 or the end of the word when delta >= 0.
1261 int Document::ExtendWordSelect(int pos, int delta, bool onlyWordCharacters) {
1262 CharClassify::cc ccStart = CharClassify::ccWord;
1263 if (delta < 0) {
1264 if (!onlyWordCharacters)
1265 ccStart = WordCharClass(cb.CharAt(pos-1));
1266 while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == ccStart))
1267 pos--;
1268 } else {
1269 if (!onlyWordCharacters && pos < Length())
1270 ccStart = WordCharClass(cb.CharAt(pos));
1271 while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == ccStart))
1272 pos++;
1274 return MovePositionOutsideChar(pos, delta, true);
1278 * Find the start of the next word in either a forward (delta >= 0) or backwards direction
1279 * (delta < 0).
1280 * This is looking for a transition between character classes although there is also some
1281 * additional movement to transit white space.
1282 * Used by cursor movement by word commands.
1284 int Document::NextWordStart(int pos, int delta) {
1285 if (delta < 0) {
1286 while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == CharClassify::ccSpace))
1287 pos--;
1288 if (pos > 0) {
1289 CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos-1));
1290 while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == ccStart)) {
1291 pos--;
1294 } else {
1295 CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos));
1296 while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == ccStart))
1297 pos++;
1298 while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == CharClassify::ccSpace))
1299 pos++;
1301 return pos;
1305 * Find the end of the next word in either a forward (delta >= 0) or backwards direction
1306 * (delta < 0).
1307 * This is looking for a transition between character classes although there is also some
1308 * additional movement to transit white space.
1309 * Used by cursor movement by word commands.
1311 int Document::NextWordEnd(int pos, int delta) {
1312 if (delta < 0) {
1313 if (pos > 0) {
1314 CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos-1));
1315 if (ccStart != CharClassify::ccSpace) {
1316 while (pos > 0 && WordCharClass(cb.CharAt(pos - 1)) == ccStart) {
1317 pos--;
1320 while (pos > 0 && WordCharClass(cb.CharAt(pos - 1)) == CharClassify::ccSpace) {
1321 pos--;
1324 } else {
1325 while (pos < Length() && WordCharClass(cb.CharAt(pos)) == CharClassify::ccSpace) {
1326 pos++;
1328 if (pos < Length()) {
1329 CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos));
1330 while (pos < Length() && WordCharClass(cb.CharAt(pos)) == ccStart) {
1331 pos++;
1335 return pos;
1339 * Check that the character at the given position is a word or punctuation character and that
1340 * the previous character is of a different character class.
1342 bool Document::IsWordStartAt(int pos) {
1343 if (pos > 0) {
1344 CharClassify::cc ccPos = WordCharClass(CharAt(pos));
1345 return (ccPos == CharClassify::ccWord || ccPos == CharClassify::ccPunctuation) &&
1346 (ccPos != WordCharClass(CharAt(pos - 1)));
1348 return true;
1352 * Check that the character at the given position is a word or punctuation character and that
1353 * the next character is of a different character class.
1355 bool Document::IsWordEndAt(int pos) {
1356 if (pos < Length()) {
1357 CharClassify::cc ccPrev = WordCharClass(CharAt(pos-1));
1358 return (ccPrev == CharClassify::ccWord || ccPrev == CharClassify::ccPunctuation) &&
1359 (ccPrev != WordCharClass(CharAt(pos)));
1361 return true;
1365 * Check that the given range is has transitions between character classes at both
1366 * ends and where the characters on the inside are word or punctuation characters.
1368 bool Document::IsWordAt(int start, int end) {
1369 return IsWordStartAt(start) && IsWordEndAt(end);
1372 static inline char MakeLowerCase(char ch) {
1373 if (ch < 'A' || ch > 'Z')
1374 return ch;
1375 else
1376 return static_cast<char>(ch - 'A' + 'a');
1379 static bool GoodTrailByte(int v) {
1380 return (v >= 0x80) && (v < 0xc0);
1383 size_t Document::ExtractChar(int pos, char *bytes) {
1384 unsigned char ch = static_cast<unsigned char>(cb.CharAt(pos));
1385 size_t widthChar = UTF8CharLength(ch);
1386 bytes[0] = ch;
1387 for (size_t i=1; i<widthChar; i++) {
1388 bytes[i] = cb.CharAt(pos+i);
1389 if (!GoodTrailByte(static_cast<unsigned char>(bytes[i]))) { // Bad byte
1390 widthChar = 1;
1393 return widthChar;
1396 CaseFolderTable::CaseFolderTable() {
1397 for (size_t iChar=0; iChar<sizeof(mapping); iChar++) {
1398 mapping[iChar] = static_cast<char>(iChar);
1402 CaseFolderTable::~CaseFolderTable() {
1405 size_t CaseFolderTable::Fold(char *folded, size_t sizeFolded, const char *mixed, size_t lenMixed) {
1406 if (lenMixed > sizeFolded) {
1407 return 0;
1408 } else {
1409 for (size_t i=0; i<lenMixed; i++) {
1410 folded[i] = mapping[static_cast<unsigned char>(mixed[i])];
1412 return lenMixed;
1416 void CaseFolderTable::SetTranslation(char ch, char chTranslation) {
1417 mapping[static_cast<unsigned char>(ch)] = chTranslation;
1420 void CaseFolderTable::StandardASCII() {
1421 for (size_t iChar=0; iChar<sizeof(mapping); iChar++) {
1422 if (iChar >= 'A' && iChar <= 'Z') {
1423 mapping[iChar] = static_cast<char>(iChar - 'A' + 'a');
1424 } else {
1425 mapping[iChar] = static_cast<char>(iChar);
1430 bool Document::MatchesWordOptions(bool word, bool wordStart, int pos, int length) {
1431 return (!word && !wordStart) ||
1432 (word && IsWordAt(pos, pos + length)) ||
1433 (wordStart && IsWordStartAt(pos));
1437 * Find text in document, supporting both forward and backward
1438 * searches (just pass minPos > maxPos to do a backward search)
1439 * Has not been tested with backwards DBCS searches yet.
1441 long Document::FindText(int minPos, int maxPos, const char *search,
1442 bool caseSensitive, bool word, bool wordStart, bool regExp, int flags,
1443 int *length, CaseFolder *pcf) {
1444 if (*length <= 0)
1445 return minPos;
1446 if (regExp) {
1447 if (!regex)
1448 regex = CreateRegexSearch(&charClass);
1449 return regex->FindText(this, minPos, maxPos, search, caseSensitive, word, wordStart, flags, length);
1450 } else {
1452 const bool forward = minPos <= maxPos;
1453 const int increment = forward ? 1 : -1;
1455 // Range endpoints should not be inside DBCS characters, but just in case, move them.
1456 const int startPos = MovePositionOutsideChar(minPos, increment, false);
1457 const int endPos = MovePositionOutsideChar(maxPos, increment, false);
1459 // Compute actual search ranges needed
1460 const int lengthFind = (*length == -1) ? static_cast<int>(strlen(search)) : *length;
1461 const int endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos;
1463 //Platform::DebugPrintf("Find %d %d %s %d\n", startPos, endPos, ft->lpstrText, lengthFind);
1464 const int limitPos = Platform::Maximum(startPos, endPos);
1465 int pos = startPos;
1466 if (!forward) {
1467 // Back all of a character
1468 pos = NextPosition(pos, increment);
1470 if (caseSensitive) {
1471 while (forward ? (pos < endSearch) : (pos >= endSearch)) {
1472 bool found = (pos + lengthFind) <= limitPos;
1473 for (int indexSearch = 0; (indexSearch < lengthFind) && found; indexSearch++) {
1474 found = CharAt(pos + indexSearch) == search[indexSearch];
1476 if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
1477 return pos;
1479 if (!NextCharacter(pos, increment))
1480 break;
1482 } else if (SC_CP_UTF8 == dbcsCodePage) {
1483 const size_t maxBytesCharacter = 4;
1484 const size_t maxFoldingExpansion = 4;
1485 std::vector<char> searchThing(lengthFind * maxBytesCharacter * maxFoldingExpansion + 1);
1486 const int lenSearch = pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind);
1487 while (forward ? (pos < endSearch) : (pos >= endSearch)) {
1488 int widthFirstCharacter = 0;
1489 int indexDocument = 0;
1490 int indexSearch = 0;
1491 bool characterMatches = true;
1492 while (characterMatches &&
1493 ((pos + indexDocument) < limitPos) &&
1494 (indexSearch < lenSearch)) {
1495 char bytes[maxBytesCharacter + 1];
1496 bytes[maxBytesCharacter] = 0;
1497 const int widthChar = ExtractChar(pos + indexDocument, bytes);
1498 if (!widthFirstCharacter)
1499 widthFirstCharacter = widthChar;
1500 char folded[maxBytesCharacter * maxFoldingExpansion + 1];
1501 const int lenFlat = pcf->Fold(folded, sizeof(folded), bytes, widthChar);
1502 folded[lenFlat] = 0;
1503 // Does folded match the buffer
1504 characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat);
1505 indexDocument += widthChar;
1506 indexSearch += lenFlat;
1508 if (characterMatches && (indexSearch == static_cast<int>(lenSearch))) {
1509 if (MatchesWordOptions(word, wordStart, pos, indexDocument)) {
1510 *length = indexDocument;
1511 return pos;
1514 if (forward) {
1515 pos += widthFirstCharacter;
1516 } else {
1517 if (!NextCharacter(pos, increment))
1518 break;
1521 } else if (dbcsCodePage) {
1522 const size_t maxBytesCharacter = 2;
1523 const size_t maxFoldingExpansion = 4;
1524 std::vector<char> searchThing(lengthFind * maxBytesCharacter * maxFoldingExpansion + 1);
1525 const int lenSearch = pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind);
1526 while (forward ? (pos < endSearch) : (pos >= endSearch)) {
1527 int indexDocument = 0;
1528 int indexSearch = 0;
1529 bool characterMatches = true;
1530 while (characterMatches &&
1531 ((pos + indexDocument) < limitPos) &&
1532 (indexSearch < lenSearch)) {
1533 char bytes[maxBytesCharacter + 1];
1534 bytes[0] = cb.CharAt(pos + indexDocument);
1535 const int widthChar = IsDBCSLeadByte(bytes[0]) ? 2 : 1;
1536 if (widthChar == 2)
1537 bytes[1] = cb.CharAt(pos + indexDocument + 1);
1538 char folded[maxBytesCharacter * maxFoldingExpansion + 1];
1539 const int lenFlat = pcf->Fold(folded, sizeof(folded), bytes, widthChar);
1540 folded[lenFlat] = 0;
1541 // Does folded match the buffer
1542 characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat);
1543 indexDocument += widthChar;
1544 indexSearch += lenFlat;
1546 if (characterMatches && (indexSearch == static_cast<int>(lenSearch))) {
1547 if (MatchesWordOptions(word, wordStart, pos, indexDocument)) {
1548 *length = indexDocument;
1549 return pos;
1552 if (!NextCharacter(pos, increment))
1553 break;
1555 } else {
1556 CaseFolderTable caseFolder;
1557 std::vector<char> searchThing(lengthFind + 1);
1558 pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind);
1559 while (forward ? (pos < endSearch) : (pos >= endSearch)) {
1560 bool found = (pos + lengthFind) <= limitPos;
1561 for (int indexSearch = 0; (indexSearch < lengthFind) && found; indexSearch++) {
1562 char ch = CharAt(pos + indexSearch);
1563 char folded[2];
1564 pcf->Fold(folded, sizeof(folded), &ch, 1);
1565 found = folded[0] == searchThing[indexSearch];
1567 if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
1568 return pos;
1570 if (!NextCharacter(pos, increment))
1571 break;
1575 //Platform::DebugPrintf("Not found\n");
1576 return -1;
1579 const char *Document::SubstituteByPosition(const char *text, int *length) {
1580 if (regex)
1581 return regex->SubstituteByPosition(this, text, length);
1582 else
1583 return 0;
1586 int Document::LinesTotal() const {
1587 return cb.Lines();
1590 void Document::ChangeCase(Range r, bool makeUpperCase) {
1591 for (int pos = r.start; pos < r.end;) {
1592 int len = LenChar(pos);
1593 if (len == 1) {
1594 char ch = CharAt(pos);
1595 if (makeUpperCase) {
1596 if (IsLowerCase(ch)) {
1597 ChangeChar(pos, static_cast<char>(MakeUpperCase(ch)));
1599 } else {
1600 if (IsUpperCase(ch)) {
1601 ChangeChar(pos, static_cast<char>(MakeLowerCase(ch)));
1605 pos += len;
1609 void Document::SetDefaultCharClasses(bool includeWordClass) {
1610 charClass.SetDefaultCharClasses(includeWordClass);
1613 void Document::SetCharClasses(const unsigned char *chars, CharClassify::cc newCharClass) {
1614 charClass.SetCharClasses(chars, newCharClass);
1617 void Document::SetStylingBits(int bits) {
1618 stylingBits = bits;
1619 stylingBitsMask = (1 << stylingBits) - 1;
1622 void SCI_METHOD Document::StartStyling(int position, char mask) {
1623 stylingMask = mask;
1624 endStyled = position;
1627 bool SCI_METHOD Document::SetStyleFor(int length, char style) {
1628 if (enteredStyling != 0) {
1629 return false;
1630 } else {
1631 enteredStyling++;
1632 style &= stylingMask;
1633 int prevEndStyled = endStyled;
1634 if (cb.SetStyleFor(endStyled, length, style, stylingMask)) {
1635 DocModification mh(SC_MOD_CHANGESTYLE | SC_PERFORMED_USER,
1636 prevEndStyled, length);
1637 NotifyModified(mh);
1639 endStyled += length;
1640 enteredStyling--;
1641 return true;
1645 bool SCI_METHOD Document::SetStyles(int length, const char *styles) {
1646 if (enteredStyling != 0) {
1647 return false;
1648 } else {
1649 enteredStyling++;
1650 bool didChange = false;
1651 int startMod = 0;
1652 int endMod = 0;
1653 for (int iPos = 0; iPos < length; iPos++, endStyled++) {
1654 PLATFORM_ASSERT(endStyled < Length());
1655 if (cb.SetStyleAt(endStyled, styles[iPos], stylingMask)) {
1656 if (!didChange) {
1657 startMod = endStyled;
1659 didChange = true;
1660 endMod = endStyled;
1663 if (didChange) {
1664 DocModification mh(SC_MOD_CHANGESTYLE | SC_PERFORMED_USER,
1665 startMod, endMod - startMod + 1);
1666 NotifyModified(mh);
1668 enteredStyling--;
1669 return true;
1673 void Document::EnsureStyledTo(int pos) {
1674 if ((enteredStyling == 0) && (pos > GetEndStyled())) {
1675 IncrementStyleClock();
1676 if (pli && !pli->UseContainerLexing()) {
1677 int lineEndStyled = LineFromPosition(GetEndStyled());
1678 int endStyledTo = LineStart(lineEndStyled);
1679 pli->Colourise(endStyledTo, pos);
1680 } else {
1681 // Ask the watchers to style, and stop as soon as one responds.
1682 for (int i = 0; pos > GetEndStyled() && i < lenWatchers; i++) {
1683 watchers[i].watcher->NotifyStyleNeeded(this, watchers[i].userData, pos);
1689 void Document::LexerChanged() {
1690 // Tell the watchers the lexer has changed.
1691 for (int i = 0; i < lenWatchers; i++) {
1692 watchers[i].watcher->NotifyLexerChanged(this, watchers[i].userData);
1696 int SCI_METHOD Document::SetLineState(int line, int state) {
1697 int statePrevious = static_cast<LineState *>(perLineData[ldState])->SetLineState(line, state);
1698 if (state != statePrevious) {
1699 DocModification mh(SC_MOD_CHANGELINESTATE, LineStart(line), 0, 0, 0, line);
1700 NotifyModified(mh);
1702 return statePrevious;
1705 int SCI_METHOD Document::GetLineState(int line) const {
1706 return static_cast<LineState *>(perLineData[ldState])->GetLineState(line);
1709 int Document::GetMaxLineState() {
1710 return static_cast<LineState *>(perLineData[ldState])->GetMaxLineState();
1713 void SCI_METHOD Document::ChangeLexerState(int start, int end) {
1714 DocModification mh(SC_MOD_LEXERSTATE, start, end-start, 0, 0, 0);
1715 NotifyModified(mh);
1718 StyledText Document::MarginStyledText(int line) {
1719 LineAnnotation *pla = static_cast<LineAnnotation *>(perLineData[ldMargin]);
1720 return StyledText(pla->Length(line), pla->Text(line),
1721 pla->MultipleStyles(line), pla->Style(line), pla->Styles(line));
1724 void Document::MarginSetText(int line, const char *text) {
1725 static_cast<LineAnnotation *>(perLineData[ldMargin])->SetText(line, text);
1726 DocModification mh(SC_MOD_CHANGEMARGIN, LineStart(line), 0, 0, 0, line);
1727 NotifyModified(mh);
1730 void Document::MarginSetStyle(int line, int style) {
1731 static_cast<LineAnnotation *>(perLineData[ldMargin])->SetStyle(line, style);
1734 void Document::MarginSetStyles(int line, const unsigned char *styles) {
1735 static_cast<LineAnnotation *>(perLineData[ldMargin])->SetStyles(line, styles);
1738 int Document::MarginLength(int line) const {
1739 return static_cast<LineAnnotation *>(perLineData[ldMargin])->Length(line);
1742 void Document::MarginClearAll() {
1743 int maxEditorLine = LinesTotal();
1744 for (int l=0; l<maxEditorLine; l++)
1745 MarginSetText(l, 0);
1746 // Free remaining data
1747 static_cast<LineAnnotation *>(perLineData[ldMargin])->ClearAll();
1750 bool Document::AnnotationAny() const {
1751 return static_cast<LineAnnotation *>(perLineData[ldAnnotation])->AnySet();
1754 StyledText Document::AnnotationStyledText(int line) {
1755 LineAnnotation *pla = static_cast<LineAnnotation *>(perLineData[ldAnnotation]);
1756 return StyledText(pla->Length(line), pla->Text(line),
1757 pla->MultipleStyles(line), pla->Style(line), pla->Styles(line));
1760 void Document::AnnotationSetText(int line, const char *text) {
1761 const int linesBefore = AnnotationLines(line);
1762 static_cast<LineAnnotation *>(perLineData[ldAnnotation])->SetText(line, text);
1763 const int linesAfter = AnnotationLines(line);
1764 DocModification mh(SC_MOD_CHANGEANNOTATION, LineStart(line), 0, 0, 0, line);
1765 mh.annotationLinesAdded = linesAfter - linesBefore;
1766 NotifyModified(mh);
1769 void Document::AnnotationSetStyle(int line, int style) {
1770 static_cast<LineAnnotation *>(perLineData[ldAnnotation])->SetStyle(line, style);
1771 DocModification mh(SC_MOD_CHANGEANNOTATION, LineStart(line), 0, 0, 0, line);
1772 NotifyModified(mh);
1775 void Document::AnnotationSetStyles(int line, const unsigned char *styles) {
1776 static_cast<LineAnnotation *>(perLineData[ldAnnotation])->SetStyles(line, styles);
1779 int Document::AnnotationLength(int line) const {
1780 return static_cast<LineAnnotation *>(perLineData[ldAnnotation])->Length(line);
1783 int Document::AnnotationLines(int line) const {
1784 return static_cast<LineAnnotation *>(perLineData[ldAnnotation])->Lines(line);
1787 void Document::AnnotationClearAll() {
1788 int maxEditorLine = LinesTotal();
1789 for (int l=0; l<maxEditorLine; l++)
1790 AnnotationSetText(l, 0);
1791 // Free remaining data
1792 static_cast<LineAnnotation *>(perLineData[ldAnnotation])->ClearAll();
1795 void Document::IncrementStyleClock() {
1796 styleClock = (styleClock + 1) % 0x100000;
1799 void SCI_METHOD Document::DecorationFillRange(int position, int value, int fillLength) {
1800 if (decorations.FillRange(position, value, fillLength)) {
1801 DocModification mh(SC_MOD_CHANGEINDICATOR | SC_PERFORMED_USER,
1802 position, fillLength);
1803 NotifyModified(mh);
1807 bool Document::AddWatcher(DocWatcher *watcher, void *userData) {
1808 for (int i = 0; i < lenWatchers; i++) {
1809 if ((watchers[i].watcher == watcher) &&
1810 (watchers[i].userData == userData))
1811 return false;
1813 WatcherWithUserData *pwNew = new WatcherWithUserData[lenWatchers + 1];
1814 for (int j = 0; j < lenWatchers; j++)
1815 pwNew[j] = watchers[j];
1816 pwNew[lenWatchers].watcher = watcher;
1817 pwNew[lenWatchers].userData = userData;
1818 delete []watchers;
1819 watchers = pwNew;
1820 lenWatchers++;
1821 return true;
1824 bool Document::RemoveWatcher(DocWatcher *watcher, void *userData) {
1825 for (int i = 0; i < lenWatchers; i++) {
1826 if ((watchers[i].watcher == watcher) &&
1827 (watchers[i].userData == userData)) {
1828 if (lenWatchers == 1) {
1829 delete []watchers;
1830 watchers = 0;
1831 lenWatchers = 0;
1832 } else {
1833 WatcherWithUserData *pwNew = new WatcherWithUserData[lenWatchers];
1834 for (int j = 0; j < lenWatchers - 1; j++) {
1835 pwNew[j] = (j < i) ? watchers[j] : watchers[j + 1];
1837 delete []watchers;
1838 watchers = pwNew;
1839 lenWatchers--;
1841 return true;
1844 return false;
1847 void Document::NotifyModifyAttempt() {
1848 for (int i = 0; i < lenWatchers; i++) {
1849 watchers[i].watcher->NotifyModifyAttempt(this, watchers[i].userData);
1853 void Document::NotifySavePoint(bool atSavePoint) {
1854 for (int i = 0; i < lenWatchers; i++) {
1855 watchers[i].watcher->NotifySavePoint(this, watchers[i].userData, atSavePoint);
1859 void Document::NotifyModified(DocModification mh) {
1860 if (mh.modificationType & SC_MOD_INSERTTEXT) {
1861 decorations.InsertSpace(mh.position, mh.length);
1862 } else if (mh.modificationType & SC_MOD_DELETETEXT) {
1863 decorations.DeleteRange(mh.position, mh.length);
1865 for (int i = 0; i < lenWatchers; i++) {
1866 watchers[i].watcher->NotifyModified(this, mh, watchers[i].userData);
1870 bool Document::IsWordPartSeparator(char ch) {
1871 return (WordCharClass(ch) == CharClassify::ccWord) && IsPunctuation(ch);
1874 int Document::WordPartLeft(int pos) {
1875 if (pos > 0) {
1876 --pos;
1877 char startChar = cb.CharAt(pos);
1878 if (IsWordPartSeparator(startChar)) {
1879 while (pos > 0 && IsWordPartSeparator(cb.CharAt(pos))) {
1880 --pos;
1883 if (pos > 0) {
1884 startChar = cb.CharAt(pos);
1885 --pos;
1886 if (IsLowerCase(startChar)) {
1887 while (pos > 0 && IsLowerCase(cb.CharAt(pos)))
1888 --pos;
1889 if (!IsUpperCase(cb.CharAt(pos)) && !IsLowerCase(cb.CharAt(pos)))
1890 ++pos;
1891 } else if (IsUpperCase(startChar)) {
1892 while (pos > 0 && IsUpperCase(cb.CharAt(pos)))
1893 --pos;
1894 if (!IsUpperCase(cb.CharAt(pos)))
1895 ++pos;
1896 } else if (IsADigit(startChar)) {
1897 while (pos > 0 && IsADigit(cb.CharAt(pos)))
1898 --pos;
1899 if (!IsADigit(cb.CharAt(pos)))
1900 ++pos;
1901 } else if (IsPunctuation(startChar)) {
1902 while (pos > 0 && IsPunctuation(cb.CharAt(pos)))
1903 --pos;
1904 if (!IsPunctuation(cb.CharAt(pos)))
1905 ++pos;
1906 } else if (isspacechar(startChar)) {
1907 while (pos > 0 && isspacechar(cb.CharAt(pos)))
1908 --pos;
1909 if (!isspacechar(cb.CharAt(pos)))
1910 ++pos;
1911 } else if (!isascii(startChar)) {
1912 while (pos > 0 && !isascii(cb.CharAt(pos)))
1913 --pos;
1914 if (isascii(cb.CharAt(pos)))
1915 ++pos;
1916 } else {
1917 ++pos;
1921 return pos;
1924 int Document::WordPartRight(int pos) {
1925 char startChar = cb.CharAt(pos);
1926 int length = Length();
1927 if (IsWordPartSeparator(startChar)) {
1928 while (pos < length && IsWordPartSeparator(cb.CharAt(pos)))
1929 ++pos;
1930 startChar = cb.CharAt(pos);
1932 if (!isascii(startChar)) {
1933 while (pos < length && !isascii(cb.CharAt(pos)))
1934 ++pos;
1935 } else if (IsLowerCase(startChar)) {
1936 while (pos < length && IsLowerCase(cb.CharAt(pos)))
1937 ++pos;
1938 } else if (IsUpperCase(startChar)) {
1939 if (IsLowerCase(cb.CharAt(pos + 1))) {
1940 ++pos;
1941 while (pos < length && IsLowerCase(cb.CharAt(pos)))
1942 ++pos;
1943 } else {
1944 while (pos < length && IsUpperCase(cb.CharAt(pos)))
1945 ++pos;
1947 if (IsLowerCase(cb.CharAt(pos)) && IsUpperCase(cb.CharAt(pos - 1)))
1948 --pos;
1949 } else if (IsADigit(startChar)) {
1950 while (pos < length && IsADigit(cb.CharAt(pos)))
1951 ++pos;
1952 } else if (IsPunctuation(startChar)) {
1953 while (pos < length && IsPunctuation(cb.CharAt(pos)))
1954 ++pos;
1955 } else if (isspacechar(startChar)) {
1956 while (pos < length && isspacechar(cb.CharAt(pos)))
1957 ++pos;
1958 } else {
1959 ++pos;
1961 return pos;
1964 bool IsLineEndChar(char c) {
1965 return (c == '\n' || c == '\r');
1968 int Document::ExtendStyleRange(int pos, int delta, bool singleLine) {
1969 int sStart = cb.StyleAt(pos);
1970 if (delta < 0) {
1971 while (pos > 0 && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsLineEndChar(cb.CharAt(pos))))
1972 pos--;
1973 pos++;
1974 } else {
1975 while (pos < (Length()) && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsLineEndChar(cb.CharAt(pos))))
1976 pos++;
1978 return pos;
1981 static char BraceOpposite(char ch) {
1982 switch (ch) {
1983 case '(':
1984 return ')';
1985 case ')':
1986 return '(';
1987 case '[':
1988 return ']';
1989 case ']':
1990 return '[';
1991 case '{':
1992 return '}';
1993 case '}':
1994 return '{';
1995 case '<':
1996 return '>';
1997 case '>':
1998 return '<';
1999 default:
2000 return '\0';
2004 // TODO: should be able to extend styled region to find matching brace
2005 int Document::BraceMatch(int position, int /*maxReStyle*/) {
2006 char chBrace = CharAt(position);
2007 char chSeek = BraceOpposite(chBrace);
2008 if (chSeek == '\0')
2009 return - 1;
2010 char styBrace = static_cast<char>(StyleAt(position) & stylingBitsMask);
2011 int direction = -1;
2012 if (chBrace == '(' || chBrace == '[' || chBrace == '{' || chBrace == '<')
2013 direction = 1;
2014 int depth = 1;
2015 position = NextPosition(position, direction);
2016 while ((position >= 0) && (position < Length())) {
2017 char chAtPos = CharAt(position);
2018 char styAtPos = static_cast<char>(StyleAt(position) & stylingBitsMask);
2019 if ((position > GetEndStyled()) || (styAtPos == styBrace)) {
2020 if (chAtPos == chBrace)
2021 depth++;
2022 if (chAtPos == chSeek)
2023 depth--;
2024 if (depth == 0)
2025 return position;
2027 int positionBeforeMove = position;
2028 position = NextPosition(position, direction);
2029 if (position == positionBeforeMove)
2030 break;
2032 return - 1;
2036 * Implementation of RegexSearchBase for the default built-in regular expression engine
2038 class BuiltinRegex : public RegexSearchBase {
2039 public:
2040 BuiltinRegex(CharClassify *charClassTable) : search(charClassTable), substituted(NULL) {}
2042 virtual ~BuiltinRegex() {
2043 delete substituted;
2046 virtual long FindText(Document *doc, int minPos, int maxPos, const char *s,
2047 bool caseSensitive, bool word, bool wordStart, int flags,
2048 int *length);
2050 virtual const char *SubstituteByPosition(Document *doc, const char *text, int *length);
2052 private:
2053 RESearch search;
2054 char *substituted;
2057 // Define a way for the Regular Expression code to access the document
2058 class DocumentIndexer : public CharacterIndexer {
2059 Document *pdoc;
2060 int end;
2061 public:
2062 DocumentIndexer(Document *pdoc_, int end_) :
2063 pdoc(pdoc_), end(end_) {
2066 virtual ~DocumentIndexer() {
2069 virtual char CharAt(int index) {
2070 if (index < 0 || index >= end)
2071 return 0;
2072 else
2073 return pdoc->CharAt(index);
2077 long BuiltinRegex::FindText(Document *doc, int minPos, int maxPos, const char *s,
2078 bool caseSensitive, bool, bool, int flags,
2079 int *length) {
2080 bool posix = (flags & SCFIND_POSIX) != 0;
2081 int increment = (minPos <= maxPos) ? 1 : -1;
2083 int startPos = minPos;
2084 int endPos = maxPos;
2086 // Range endpoints should not be inside DBCS characters, but just in case, move them.
2087 startPos = doc->MovePositionOutsideChar(startPos, 1, false);
2088 endPos = doc->MovePositionOutsideChar(endPos, 1, false);
2090 const char *errmsg = search.Compile(s, *length, caseSensitive, posix);
2091 if (errmsg) {
2092 return -1;
2094 // Find a variable in a property file: \$(\([A-Za-z0-9_.]+\))
2095 // Replace first '.' with '-' in each property file variable reference:
2096 // Search: \$(\([A-Za-z0-9_-]+\)\.\([A-Za-z0-9_.]+\))
2097 // Replace: $(\1-\2)
2098 int lineRangeStart = doc->LineFromPosition(startPos);
2099 int lineRangeEnd = doc->LineFromPosition(endPos);
2100 if ((increment == 1) &&
2101 (startPos >= doc->LineEnd(lineRangeStart)) &&
2102 (lineRangeStart < lineRangeEnd)) {
2103 // the start position is at end of line or between line end characters.
2104 lineRangeStart++;
2105 startPos = doc->LineStart(lineRangeStart);
2106 } else if ((increment == -1) &&
2107 (startPos <= doc->LineStart(lineRangeStart)) &&
2108 (lineRangeStart > lineRangeEnd)) {
2109 // the start position is at beginning of line.
2110 lineRangeStart--;
2111 startPos = doc->LineEnd(lineRangeStart);
2113 int pos = -1;
2114 int lenRet = 0;
2115 char searchEnd = s[*length - 1];
2116 int lineRangeBreak = lineRangeEnd + increment;
2117 for (int line = lineRangeStart; line != lineRangeBreak; line += increment) {
2118 int startOfLine = doc->LineStart(line);
2119 int endOfLine = doc->LineEnd(line);
2120 if (increment == 1) {
2121 if (line == lineRangeStart) {
2122 if ((startPos != startOfLine) && (s[0] == '^'))
2123 continue; // Can't match start of line if start position after start of line
2124 startOfLine = startPos;
2126 if (line == lineRangeEnd) {
2127 if ((endPos != endOfLine) && (searchEnd == '$'))
2128 continue; // Can't match end of line if end position before end of line
2129 endOfLine = endPos;
2131 } else {
2132 if (line == lineRangeEnd) {
2133 if ((endPos != startOfLine) && (s[0] == '^'))
2134 continue; // Can't match start of line if end position after start of line
2135 startOfLine = endPos;
2137 if (line == lineRangeStart) {
2138 if ((startPos != endOfLine) && (searchEnd == '$'))
2139 continue; // Can't match end of line if start position before end of line
2140 endOfLine = startPos;
2144 DocumentIndexer di(doc, endOfLine);
2145 int success = search.Execute(di, startOfLine, endOfLine);
2146 if (success) {
2147 pos = search.bopat[0];
2148 lenRet = search.eopat[0] - search.bopat[0];
2149 // There can be only one start of a line, so no need to look for last match in line
2150 if ((increment == -1) && (s[0] != '^')) {
2151 // Check for the last match on this line.
2152 int repetitions = 1000; // Break out of infinite loop
2153 while (success && (search.eopat[0] <= endOfLine) && (repetitions--)) {
2154 success = search.Execute(di, pos+1, endOfLine);
2155 if (success) {
2156 if (search.eopat[0] <= minPos) {
2157 pos = search.bopat[0];
2158 lenRet = search.eopat[0] - search.bopat[0];
2159 } else {
2160 success = 0;
2165 break;
2168 *length = lenRet;
2169 return pos;
2172 const char *BuiltinRegex::SubstituteByPosition(Document *doc, const char *text, int *length) {
2173 delete []substituted;
2174 substituted = 0;
2175 DocumentIndexer di(doc, doc->Length());
2176 if (!search.GrabMatches(di))
2177 return 0;
2178 unsigned int lenResult = 0;
2179 for (int i = 0; i < *length; i++) {
2180 if (text[i] == '\\') {
2181 if (text[i + 1] >= '1' && text[i + 1] <= '9') {
2182 unsigned int patNum = text[i + 1] - '0';
2183 lenResult += search.eopat[patNum] - search.bopat[patNum];
2184 i++;
2185 } else {
2186 switch (text[i + 1]) {
2187 case 'a':
2188 case 'b':
2189 case 'f':
2190 case 'n':
2191 case 'r':
2192 case 't':
2193 case 'v':
2194 case '\\':
2195 i++;
2197 lenResult++;
2199 } else {
2200 lenResult++;
2203 substituted = new char[lenResult + 1];
2204 char *o = substituted;
2205 for (int j = 0; j < *length; j++) {
2206 if (text[j] == '\\') {
2207 if (text[j + 1] >= '1' && text[j + 1] <= '9') {
2208 unsigned int patNum = text[j + 1] - '0';
2209 unsigned int len = search.eopat[patNum] - search.bopat[patNum];
2210 if (search.pat[patNum]) // Will be null if try for a match that did not occur
2211 memcpy(o, search.pat[patNum], len);
2212 o += len;
2213 j++;
2214 } else {
2215 j++;
2216 switch (text[j]) {
2217 case 'a':
2218 *o++ = '\a';
2219 break;
2220 case 'b':
2221 *o++ = '\b';
2222 break;
2223 case 'f':
2224 *o++ = '\f';
2225 break;
2226 case 'n':
2227 *o++ = '\n';
2228 break;
2229 case 'r':
2230 *o++ = '\r';
2231 break;
2232 case 't':
2233 *o++ = '\t';
2234 break;
2235 case 'v':
2236 *o++ = '\v';
2237 break;
2238 case '\\':
2239 *o++ = '\\';
2240 break;
2241 default:
2242 *o++ = '\\';
2243 j--;
2246 } else {
2247 *o++ = text[j];
2250 *o = '\0';
2251 *length = lenResult;
2252 return substituted;
2255 #ifndef SCI_OWNREGEX
2257 #ifdef SCI_NAMESPACE
2259 RegexSearchBase *Scintilla::CreateRegexSearch(CharClassify *charClassTable) {
2260 return new BuiltinRegex(charClassTable);
2263 #else
2265 RegexSearchBase *CreateRegexSearch(CharClassify *charClassTable) {
2266 return new BuiltinRegex(charClassTable);
2269 #endif
2271 #endif