scintilla/src/Document.cxx

   1 // Scintilla source code edit control
   2 /** @file Document.cxx
   3  ** Text document that handles notifications, DBCS, styling, words and end of line.
   4  **/
   5 // Copyright 1998-2011 by Neil Hodgson <neilh@scintilla.org>
   6 // The License.txt file describes the conditions under which this software may be distributed.
   7
   8 #include <stdlib.h>
   9 #include <string.h>
  10 #include <stdio.h>
  11 #include <ctype.h>
  12 #include <assert.h>
  13
  14 #include <string>
  15 #include <vector>
  16 #include <algorithm>
  17
  18 #include "Platform.h"
  19
  20 #include "ILexer.h"
  21 #include "Scintilla.h"
  22
  23 #include "SplitVector.h"
  24 #include "Partitioning.h"
  25 #include "RunStyles.h"
  26 #include "CellBuffer.h"
  27 #include "PerLine.h"
  28 #include "CharClassify.h"
  29 #include "CharacterSet.h"
  30 #include "Decoration.h"
  31 #include "CaseFolder.h"
  32 #include "Document.h"
  33 #include "RESearch.h"
  34 #include "UniConversion.h"
  35
  36 #ifdef SCI_NAMESPACE
  37 using namespace Scintilla;
  38 #endif
  39
  40 static inline bool IsPunctuation(char ch) {
  41         return IsASCII(ch) && ispunct(ch);
  42 }
  43
  44 void LexInterface::Colourise(int start, int end) {
  45         if (pdoc && instance && !performingStyle) {
  46                 // Protect against reentrance, which may occur, for example, when
  47                 // fold points are discovered while performing styling and the folding
  48                 // code looks for child lines which may trigger styling.
  49                 performingStyle = true;
  50
  51                 int lengthDoc = pdoc->Length();
  52                 if (end == -1)
  53                         end = lengthDoc;
  54                 int len = end - start;
  55
  56                 PLATFORM_ASSERT(len >= 0);
  57                 PLATFORM_ASSERT(start + len <= lengthDoc);
  58
  59                 int styleStart = 0;
  60                 if (start > 0)
  61                         styleStart = pdoc->StyleAt(start - 1) & pdoc->stylingBitsMask;
  62
  63                 if (len > 0) {
  64                         instance->Lex(start, len, styleStart, pdoc);
  65                         instance->Fold(start, len, styleStart, pdoc);
  66                 }
  67
  68                 performingStyle = false;
  69         }
  70 }
  71
  72 int LexInterface::LineEndTypesSupported() {
  73         if (instance) {
  74                 int interfaceVersion = instance->Version();
  75                 if (interfaceVersion >= lvSubStyles) {
  76                         ILexerWithSubStyles *ssinstance = static_cast<ILexerWithSubStyles *>(instance);
  77                         return ssinstance->LineEndTypesSupported();
  78                 }
  79         }
  80         return 0;
  81 }
  82
  83 Document::Document() {
  84         refCount = 0;
  85         pcf = NULL;
  86 #ifdef _WIN32
  87         eolMode = SC_EOL_CRLF;
  88 #else
  89         eolMode = SC_EOL_LF;
  90 #endif
  91         dbcsCodePage = 0;
  92         lineEndBitSet = SC_LINE_END_TYPE_DEFAULT;
  93         stylingBits = 5;
  94         stylingBitsMask = 0x1F;
  95         stylingMask = 0;
  96         endStyled = 0;
  97         styleClock = 0;
  98         enteredModification = 0;
  99         enteredStyling = 0;
 100         enteredReadOnlyCount = 0;
 101         tabInChars = 8;
 102         indentInChars = 0;
 103         actualIndentInChars = 8;
 104         useTabs = true;
 105         tabIndents = true;
 106         backspaceUnindents = false;
 107
 108         matchesValid = false;
 109         regex = 0;
 110
 111         UTF8BytesOfLeadInitialise();
 112
 113         perLineData[ldMarkers] = new LineMarkers();
 114         perLineData[ldLevels] = new LineLevels();
 115         perLineData[ldState] = new LineState();
 116         perLineData[ldMargin] = new LineAnnotation();
 117         perLineData[ldAnnotation] = new LineAnnotation();
 118
 119         cb.SetPerLine(this);
 120
 121         pli = 0;
 122 }
 123
 124 Document::~Document() {
 125         for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
 126                 it->watcher->NotifyDeleted(this, it->userData);
 127         }
 128         for (int j=0; j<ldSize; j++) {
 129                 delete perLineData[j];
 130                 perLineData[j] = 0;
 131         }
 132         delete regex;
 133         regex = 0;
 134         delete pli;
 135         pli = 0;
 136         delete pcf;
 137         pcf = 0;
 138 }
 139
 140 void Document::Init() {
 141         for (int j=0; j<ldSize; j++) {
 142                 if (perLineData[j])
 143                         perLineData[j]->Init();
 144         }
 145 }
 146
 147 int Document::LineEndTypesSupported() const {
 148         if ((SC_CP_UTF8 == dbcsCodePage) && pli)
 149                 return pli->LineEndTypesSupported();
 150         else
 151                 return 0;
 152 }
 153
 154 bool Document::SetDBCSCodePage(int dbcsCodePage_) {
 155         if (dbcsCodePage != dbcsCodePage_) {
 156                 dbcsCodePage = dbcsCodePage_;
 157                 SetCaseFolder(NULL);
 158                 cb.SetLineEndTypes(lineEndBitSet & LineEndTypesSupported());
 159                 return true;
 160         } else {
 161                 return false;
 162         }
 163 }
 164
 165 bool Document::SetLineEndTypesAllowed(int lineEndBitSet_) {
 166         if (lineEndBitSet != lineEndBitSet_) {
 167                 lineEndBitSet = lineEndBitSet_;
 168                 int lineEndBitSetActive = lineEndBitSet & LineEndTypesSupported();
 169                 if (lineEndBitSetActive != cb.GetLineEndTypes()) {
 170                         ModifiedAt(0);
 171                         cb.SetLineEndTypes(lineEndBitSetActive);
 172                         return true;
 173                 } else {
 174                         return false;
 175                 }
 176         } else {
 177                 return false;
 178         }
 179 }
 180
 181 void Document::InsertLine(int line) {
 182         for (int j=0; j<ldSize; j++) {
 183                 if (perLineData[j])
 184                         perLineData[j]->InsertLine(line);
 185         }
 186 }
 187
 188 void Document::RemoveLine(int line) {
 189         for (int j=0; j<ldSize; j++) {
 190                 if (perLineData[j])
 191                         perLineData[j]->RemoveLine(line);
 192         }
 193 }
 194
 195 // Increase reference count and return its previous value.
 196 int Document::AddRef() {
 197         return refCount++;
 198 }
 199
 200 // Decrease reference count and return its previous value.
 201 // Delete the document if reference count reaches zero.
 202 int SCI_METHOD Document::Release() {
 203         int curRefCount = --refCount;
 204         if (curRefCount == 0)
 205                 delete this;
 206         return curRefCount;
 207 }
 208
 209 void Document::SetSavePoint() {
 210         cb.SetSavePoint();
 211         NotifySavePoint(true);
 212 }
 213
 214 int Document::GetMark(int line) {
 215         return static_cast<LineMarkers *>(perLineData[ldMarkers])->MarkValue(line);
 216 }
 217
 218 int Document::MarkerNext(int lineStart, int mask) const {
 219         return static_cast<LineMarkers *>(perLineData[ldMarkers])->MarkerNext(lineStart, mask);
 220 }
 221
 222 int Document::AddMark(int line, int markerNum) {
 223         if (line >= 0 && line <= LinesTotal()) {
 224                 int prev = static_cast<LineMarkers *>(perLineData[ldMarkers])->
 225                         AddMark(line, markerNum, LinesTotal());
 226                 DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, 0, line);
 227                 NotifyModified(mh);
 228                 return prev;
 229         } else {
 230                 return 0;
 231         }
 232 }
 233
 234 void Document::AddMarkSet(int line, int valueSet) {
 235         if (line < 0 || line > LinesTotal()) {
 236                 return;
 237         }
 238         unsigned int m = valueSet;
 239         for (int i = 0; m; i++, m >>= 1)
 240                 if (m & 1)
 241                         static_cast<LineMarkers *>(perLineData[ldMarkers])->
 242                                 AddMark(line, i, LinesTotal());
 243         DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, 0, line);
 244         NotifyModified(mh);
 245 }
 246
 247 void Document::DeleteMark(int line, int markerNum) {
 248         static_cast<LineMarkers *>(perLineData[ldMarkers])->DeleteMark(line, markerNum, false);
 249         DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, 0, line);
 250         NotifyModified(mh);
 251 }
 252
 253 void Document::DeleteMarkFromHandle(int markerHandle) {
 254         static_cast<LineMarkers *>(perLineData[ldMarkers])->DeleteMarkFromHandle(markerHandle);
 255         DocModification mh(SC_MOD_CHANGEMARKER, 0, 0, 0, 0);
 256         mh.line = -1;
 257         NotifyModified(mh);
 258 }
 259
 260 void Document::DeleteAllMarks(int markerNum) {
 261         bool someChanges = false;
 262         for (int line = 0; line < LinesTotal(); line++) {
 263                 if (static_cast<LineMarkers *>(perLineData[ldMarkers])->DeleteMark(line, markerNum, true))
 264                         someChanges = true;
 265         }
 266         if (someChanges) {
 267                 DocModification mh(SC_MOD_CHANGEMARKER, 0, 0, 0, 0);
 268                 mh.line = -1;
 269                 NotifyModified(mh);
 270         }
 271 }
 272
 273 int Document::LineFromHandle(int markerHandle) {
 274         return static_cast<LineMarkers *>(perLineData[ldMarkers])->LineFromHandle(markerHandle);
 275 }
 276
 277 int SCI_METHOD Document::LineStart(int line) const {
 278         return cb.LineStart(line);
 279 }
 280
 281 int SCI_METHOD Document::LineEnd(int line) const {
 282         if (line >= LinesTotal() - 1) {
 283                 return LineStart(line + 1);
 284         } else {
 285                 int position = LineStart(line + 1);
 286                 if (SC_CP_UTF8 == dbcsCodePage) {
 287                         unsigned char bytes[] = {
 288                                 static_cast<unsigned char>(cb.CharAt(position-3)),
 289                                 static_cast<unsigned char>(cb.CharAt(position-2)),
 290                                 static_cast<unsigned char>(cb.CharAt(position-1)),
 291                         };
 292                         if (UTF8IsSeparator(bytes)) {
 293                                 return position - UTF8SeparatorLength;
 294                         }
 295                         if (UTF8IsNEL(bytes+1)) {
 296                                 return position - UTF8NELLength;
 297                         }
 298                 }
 299                 position--; // Back over CR or LF
 300                 // When line terminator is CR+LF, may need to go back one more
 301                 if ((position > LineStart(line)) && (cb.CharAt(position - 1) == '\r')) {
 302                         position--;
 303                 }
 304                 return position;
 305         }
 306 }
 307
 308 void SCI_METHOD Document::SetErrorStatus(int status) {
 309         // Tell the watchers an error has occurred.
 310         for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
 311                 it->watcher->NotifyErrorOccurred(this, it->userData, status);
 312         }
 313 }
 314
 315 int SCI_METHOD Document::LineFromPosition(int pos) const {
 316         return cb.LineFromPosition(pos);
 317 }
 318
 319 int Document::LineEndPosition(int position) const {
 320         return LineEnd(LineFromPosition(position));
 321 }
 322
 323 bool Document::IsLineEndPosition(int position) const {
 324         return LineEnd(LineFromPosition(position)) == position;
 325 }
 326
 327 bool Document::IsPositionInLineEnd(int position) const {
 328         return position >= LineEnd(LineFromPosition(position));
 329 }
 330
 331 int Document::VCHomePosition(int position) const {
 332         int line = LineFromPosition(position);
 333         int startPosition = LineStart(line);
 334         int endLine = LineEnd(line);
 335         int startText = startPosition;
 336         while (startText < endLine && (cb.CharAt(startText) == ' ' || cb.CharAt(startText) == '\t'))
 337                 startText++;
 338         if (position == startText)
 339                 return startPosition;
 340         else
 341                 return startText;
 342 }
 343
 344 int SCI_METHOD Document::SetLevel(int line, int level) {
 345         int prev = static_cast<LineLevels *>(perLineData[ldLevels])->SetLevel(line, level, LinesTotal());
 346         if (prev != level) {
 347                 DocModification mh(SC_MOD_CHANGEFOLD | SC_MOD_CHANGEMARKER,
 348                                    LineStart(line), 0, 0, 0, line);
 349                 mh.foldLevelNow = level;
 350                 mh.foldLevelPrev = prev;
 351                 NotifyModified(mh);
 352         }
 353         return prev;
 354 }
 355
 356 int SCI_METHOD Document::GetLevel(int line) const {
 357         return static_cast<LineLevels *>(perLineData[ldLevels])->GetLevel(line);
 358 }
 359
 360 void Document::ClearLevels() {
 361         static_cast<LineLevels *>(perLineData[ldLevels])->ClearLevels();
 362 }
 363
 364 static bool IsSubordinate(int levelStart, int levelTry) {
 365         if (levelTry & SC_FOLDLEVELWHITEFLAG)
 366                 return true;
 367         else
 368                 return (levelStart & SC_FOLDLEVELNUMBERMASK) < (levelTry & SC_FOLDLEVELNUMBERMASK);
 369 }
 370
 371 int Document::GetLastChild(int lineParent, int level, int lastLine) {
 372         if (level == -1)
 373                 level = GetLevel(lineParent) & SC_FOLDLEVELNUMBERMASK;
 374         int maxLine = LinesTotal();
 375         int lookLastLine = (lastLine != -1) ? Platform::Minimum(LinesTotal() - 1, lastLine) : -1;
 376         int lineMaxSubord = lineParent;
 377         while (lineMaxSubord < maxLine - 1) {
 378                 EnsureStyledTo(LineStart(lineMaxSubord + 2));
 379                 if (!IsSubordinate(level, GetLevel(lineMaxSubord + 1)))
 380                         break;
 381                 if ((lookLastLine != -1) && (lineMaxSubord >= lookLastLine) && !(GetLevel(lineMaxSubord) & SC_FOLDLEVELWHITEFLAG))
 382                         break;
 383                 lineMaxSubord++;
 384         }
 385         if (lineMaxSubord > lineParent) {
 386                 if (level > (GetLevel(lineMaxSubord + 1) & SC_FOLDLEVELNUMBERMASK)) {
 387                         // Have chewed up some whitespace that belongs to a parent so seek back
 388                         if (GetLevel(lineMaxSubord) & SC_FOLDLEVELWHITEFLAG) {
 389                                 lineMaxSubord--;
 390                         }
 391                 }
 392         }
 393         return lineMaxSubord;
 394 }
 395
 396 int Document::GetFoldParent(int line) const {
 397         int level = GetLevel(line) & SC_FOLDLEVELNUMBERMASK;
 398         int lineLook = line - 1;
 399         while ((lineLook > 0) && (
 400                     (!(GetLevel(lineLook) & SC_FOLDLEVELHEADERFLAG)) ||
 401                     ((GetLevel(lineLook) & SC_FOLDLEVELNUMBERMASK) >= level))
 402               ) {
 403                 lineLook--;
 404         }
 405         if ((GetLevel(lineLook) & SC_FOLDLEVELHEADERFLAG) &&
 406                 ((GetLevel(lineLook) & SC_FOLDLEVELNUMBERMASK) < level)) {
 407                 return lineLook;
 408         } else {
 409                 return -1;
 410         }
 411 }
 412
 413 void Document::GetHighlightDelimiters(HighlightDelimiter &highlightDelimiter, int line, int lastLine) {
 414         int level = GetLevel(line);
 415         int lookLastLine = Platform::Maximum(line, lastLine) + 1;
 416
 417         int lookLine = line;
 418         int lookLineLevel = level;
 419         int lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
 420         while ((lookLine > 0) && ((lookLineLevel & SC_FOLDLEVELWHITEFLAG) ||
 421                 ((lookLineLevel & SC_FOLDLEVELHEADERFLAG) && (lookLineLevelNum >= (GetLevel(lookLine + 1) & SC_FOLDLEVELNUMBERMASK))))) {
 422                 lookLineLevel = GetLevel(--lookLine);
 423                 lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
 424         }
 425
 426         int beginFoldBlock = (lookLineLevel & SC_FOLDLEVELHEADERFLAG) ? lookLine : GetFoldParent(lookLine);
 427         if (beginFoldBlock == -1) {
 428                 highlightDelimiter.Clear();
 429                 return;
 430         }
 431
 432         int endFoldBlock = GetLastChild(beginFoldBlock, -1, lookLastLine);
 433         int firstChangeableLineBefore = -1;
 434         if (endFoldBlock < line) {
 435                 lookLine = beginFoldBlock - 1;
 436                 lookLineLevel = GetLevel(lookLine);
 437                 lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
 438                 while ((lookLine >= 0) && (lookLineLevelNum >= SC_FOLDLEVELBASE)) {
 439                         if (lookLineLevel & SC_FOLDLEVELHEADERFLAG) {
 440                                 if (GetLastChild(lookLine, -1, lookLastLine) == line) {
 441                                         beginFoldBlock = lookLine;
 442                                         endFoldBlock = line;
 443                                         firstChangeableLineBefore = line - 1;
 444                                 }
 445                         }
 446                         if ((lookLine > 0) && (lookLineLevelNum == SC_FOLDLEVELBASE) && ((GetLevel(lookLine - 1) & SC_FOLDLEVELNUMBERMASK) > lookLineLevelNum))
 447                                 break;
 448                         lookLineLevel = GetLevel(--lookLine);
 449                         lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
 450                 }
 451         }
 452         if (firstChangeableLineBefore == -1) {
 453                 for (lookLine = line - 1, lookLineLevel = GetLevel(lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
 454                         lookLine >= beginFoldBlock;
 455                         lookLineLevel = GetLevel(--lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK) {
 456                         if ((lookLineLevel & SC_FOLDLEVELWHITEFLAG) || (lookLineLevelNum > (level & SC_FOLDLEVELNUMBERMASK))) {
 457                                 firstChangeableLineBefore = lookLine;
 458                                 break;
 459                         }
 460                 }
 461         }
 462         if (firstChangeableLineBefore == -1)
 463                 firstChangeableLineBefore = beginFoldBlock - 1;
 464
 465         int firstChangeableLineAfter = -1;
 466         for (lookLine = line + 1, lookLineLevel = GetLevel(lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
 467                 lookLine <= endFoldBlock;
 468                 lookLineLevel = GetLevel(++lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK) {
 469                 if ((lookLineLevel & SC_FOLDLEVELHEADERFLAG) && (lookLineLevelNum < (GetLevel(lookLine + 1) & SC_FOLDLEVELNUMBERMASK))) {
 470                         firstChangeableLineAfter = lookLine;
 471                         break;
 472                 }
 473         }
 474         if (firstChangeableLineAfter == -1)
 475                 firstChangeableLineAfter = endFoldBlock + 1;
 476
 477         highlightDelimiter.beginFoldBlock = beginFoldBlock;
 478         highlightDelimiter.endFoldBlock = endFoldBlock;
 479         highlightDelimiter.firstChangeableLineBefore = firstChangeableLineBefore;
 480         highlightDelimiter.firstChangeableLineAfter = firstChangeableLineAfter;
 481 }
 482
 483 int Document::ClampPositionIntoDocument(int pos) const {
 484         return Platform::Clamp(pos, 0, Length());
 485 }
 486
 487 bool Document::IsCrLf(int pos) const {
 488         if (pos < 0)
 489                 return false;
 490         if (pos >= (Length() - 1))
 491                 return false;
 492         return (cb.CharAt(pos) == '\r') && (cb.CharAt(pos + 1) == '\n');
 493 }
 494
 495 int Document::LenChar(int pos) {
 496         if (pos < 0) {
 497                 return 1;
 498         } else if (IsCrLf(pos)) {
 499                 return 2;
 500         } else if (SC_CP_UTF8 == dbcsCodePage) {
 501                 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(pos));
 502                 const int widthCharBytes = UTF8BytesOfLead[leadByte];
 503                 int lengthDoc = Length();
 504                 if ((pos + widthCharBytes) > lengthDoc)
 505                         return lengthDoc - pos;
 506                 else
 507                         return widthCharBytes;
 508         } else if (dbcsCodePage) {
 509                 return IsDBCSLeadByte(cb.CharAt(pos)) ? 2 : 1;
 510         } else {
 511                 return 1;
 512         }
 513 }
 514
 515 bool Document::InGoodUTF8(int pos, int &start, int &end) const {
 516         int trail = pos;
 517         while ((trail>0) && (pos-trail < UTF8MaxBytes) && UTF8IsTrailByte(static_cast<unsigned char>(cb.CharAt(trail-1))))
 518                 trail--;
 519         start = (trail > 0) ? trail-1 : trail;
 520
 521         const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(start));
 522         const int widthCharBytes = UTF8BytesOfLead[leadByte];
 523         if (widthCharBytes == 1) {
 524                 return false;
 525         } else {
 526                 int trailBytes = widthCharBytes - 1;
 527                 int len = pos - start;
 528                 if (len > trailBytes)
 529                         // pos too far from lead
 530                         return false;
 531                 char charBytes[UTF8MaxBytes] = {static_cast<char>(leadByte),0,0,0};
 532                 for (int b=1; b<widthCharBytes && ((start+b) < Length()); b++)
 533                         charBytes[b] = cb.CharAt(static_cast<int>(start+b));
 534                 int utf8status = UTF8Classify(reinterpret_cast<const unsigned char *>(charBytes), widthCharBytes);
 535                 if (utf8status & UTF8MaskInvalid)
 536                         return false;
 537                 end = start + widthCharBytes;
 538                 return true;
 539         }
 540 }
 541
 542 // Normalise a position so that it is not halfway through a two byte character.
 543 // This can occur in two situations -
 544 // When lines are terminated with \r\n pairs which should be treated as one character.
 545 // When displaying DBCS text such as Japanese.
 546 // If moving, move the position in the indicated direction.
 547 int Document::MovePositionOutsideChar(int pos, int moveDir, bool checkLineEnd) {
 548         //Platform::DebugPrintf("NoCRLF %d %d\n", pos, moveDir);
 549         // If out of range, just return minimum/maximum value.
 550         if (pos <= 0)
 551                 return 0;
 552         if (pos >= Length())
 553                 return Length();
 554
 555         // PLATFORM_ASSERT(pos > 0 && pos < Length());
 556         if (checkLineEnd && IsCrLf(pos - 1)) {
 557                 if (moveDir > 0)
 558                         return pos + 1;
 559                 else
 560                         return pos - 1;
 561         }
 562
 563         if (dbcsCodePage) {
 564                 if (SC_CP_UTF8 == dbcsCodePage) {
 565                         unsigned char ch = static_cast<unsigned char>(cb.CharAt(pos));
 566                         // If ch is not a trail byte then pos is valid intercharacter position
 567                         if (UTF8IsTrailByte(ch)) {
 568                                 int startUTF = pos;
 569                                 int endUTF = pos;
 570                                 if (InGoodUTF8(pos, startUTF, endUTF)) {
 571                                         // ch is a trail byte within a UTF-8 character
 572                                         if (moveDir > 0)
 573                                                 pos = endUTF;
 574                                         else
 575                                                 pos = startUTF;
 576                                 }
 577                                 // Else invalid UTF-8 so return position of isolated trail byte
 578                         }
 579                 } else {
 580                         // Anchor DBCS calculations at start of line because start of line can
 581                         // not be a DBCS trail byte.
 582                         int posStartLine = LineStart(LineFromPosition(pos));
 583                         if (pos == posStartLine)
 584                                 return pos;
 585
 586                         // Step back until a non-lead-byte is found.
 587                         int posCheck = pos;
 588                         while ((posCheck > posStartLine) && IsDBCSLeadByte(cb.CharAt(posCheck-1)))
 589                                 posCheck--;
 590
 591                         // Check from known start of character.
 592                         while (posCheck < pos) {
 593                                 int mbsize = IsDBCSLeadByte(cb.CharAt(posCheck)) ? 2 : 1;
 594                                 if (posCheck + mbsize == pos) {
 595                                         return pos;
 596                                 } else if (posCheck + mbsize > pos) {
 597                                         if (moveDir > 0) {
 598                                                 return posCheck + mbsize;
 599                                         } else {
 600                                                 return posCheck;
 601                                         }
 602                                 }
 603                                 posCheck += mbsize;
 604                         }
 605                 }
 606         }
 607
 608         return pos;
 609 }
 610
 611 // NextPosition moves between valid positions - it can not handle a position in the middle of a
 612 // multi-byte character. It is used to iterate through text more efficiently than MovePositionOutsideChar.
 613 // A \r\n pair is treated as two characters.
 614 int Document::NextPosition(int pos, int moveDir) const {
 615         // If out of range, just return minimum/maximum value.
 616         int increment = (moveDir > 0) ? 1 : -1;
 617         if (pos + increment <= 0)
 618                 return 0;
 619         if (pos + increment >= Length())
 620                 return Length();
 621
 622         if (dbcsCodePage) {
 623                 if (SC_CP_UTF8 == dbcsCodePage) {
 624                         if (increment == 1) {
 625                                 // Simple forward movement case so can avoid some checks
 626                                 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(pos));
 627                                 if (UTF8IsAscii(leadByte)) {
 628                                         // Single byte character or invalid
 629                                         pos++;
 630                                 } else {
 631                                         const int widthCharBytes = UTF8BytesOfLead[leadByte];
 632                                         char charBytes[UTF8MaxBytes] = {static_cast<char>(leadByte),0,0,0};
 633                                         for (int b=1; b<widthCharBytes; b++)
 634                                                 charBytes[b] = cb.CharAt(static_cast<int>(pos+b));
 635                                         int utf8status = UTF8Classify(reinterpret_cast<const unsigned char *>(charBytes), widthCharBytes);
 636                                         if (utf8status & UTF8MaskInvalid)
 637                                                 pos++;
 638                                         else
 639                                                 pos += utf8status & UTF8MaskWidth;
 640                                 }
 641                         } else {
 642                                 // Examine byte before position
 643                                 pos--;
 644                                 unsigned char ch = static_cast<unsigned char>(cb.CharAt(pos));
 645                                 // If ch is not a trail byte then pos is valid intercharacter position
 646                                 if (UTF8IsTrailByte(ch)) {
 647                                         // If ch is a trail byte in a valid UTF-8 character then return start of character
 648                                         int startUTF = pos;
 649                                         int endUTF = pos;
 650                                         if (InGoodUTF8(pos, startUTF, endUTF)) {
 651                                                 pos = startUTF;
 652                                         }
 653                                         // Else invalid UTF-8 so return position of isolated trail byte
 654                                 }
 655                         }
 656                 } else {
 657                         if (moveDir > 0) {
 658                                 int mbsize = IsDBCSLeadByte(cb.CharAt(pos)) ? 2 : 1;
 659                                 pos += mbsize;
 660                                 if (pos > Length())
 661                                         pos = Length();
 662                         } else {
 663                                 // Anchor DBCS calculations at start of line because start of line can
 664                                 // not be a DBCS trail byte.
 665                                 int posStartLine = LineStart(LineFromPosition(pos));
 666                                 // See http://msdn.microsoft.com/en-us/library/cc194792%28v=MSDN.10%29.aspx
 667                                 // http://msdn.microsoft.com/en-us/library/cc194790.aspx
 668                                 if ((pos - 1) <= posStartLine) {
 669                                         return pos - 1;
 670                                 } else if (IsDBCSLeadByte(cb.CharAt(pos - 1))) {
 671                                         // Must actually be trail byte
 672                                         return pos - 2;
 673                                 } else {
 674                                         // Otherwise, step back until a non-lead-byte is found.
 675                                         int posTemp = pos - 1;
 676                                         while (posStartLine <= --posTemp && IsDBCSLeadByte(cb.CharAt(posTemp)))
 677                                                 ;
 678                                         // Now posTemp+1 must point to the beginning of a character,
 679                                         // so figure out whether we went back an even or an odd
 680                                         // number of bytes and go back 1 or 2 bytes, respectively.
 681                                         return (pos - 1 - ((pos - posTemp) & 1));
 682                                 }
 683                         }
 684                 }
 685         } else {
 686                 pos += increment;
 687         }
 688
 689         return pos;
 690 }
 691
 692 bool Document::NextCharacter(int &pos, int moveDir) const {
 693         // Returns true if pos changed
 694         int posNext = NextPosition(pos, moveDir);
 695         if (posNext == pos) {
 696                 return false;
 697         } else {
 698                 pos = posNext;
 699                 return true;
 700         }
 701 }
 702
 703 static inline int UnicodeFromBytes(const unsigned char *us) {
 704         if (us[0] < 0xC2) {
 705                 return us[0];
 706         } else if (us[0] < 0xE0) {
 707                 return ((us[0] & 0x1F) << 6) + (us[1] & 0x3F);
 708         } else if (us[0] < 0xF0) {
 709                 return ((us[0] & 0xF) << 12) + ((us[1] & 0x3F) << 6) + (us[2] & 0x3F);
 710         } else if (us[0] < 0xF5) {
 711                 return ((us[0] & 0x7) << 18) + ((us[1] & 0x3F) << 12) + ((us[2] & 0x3F) << 6) + (us[3] & 0x3F);
 712         }
 713         return us[0];
 714 }
 715
 716 // Return -1  on out-of-bounds
 717 int SCI_METHOD Document::GetRelativePosition(int positionStart, int characterOffset) const {
 718         int pos = positionStart;
 719         if (dbcsCodePage) {
 720                 const int increment = (characterOffset > 0) ? 1 : -1;
 721                 while (characterOffset != 0) {
 722                         const int posNext = NextPosition(pos, increment);
 723                         if (posNext == pos)
 724                                 return INVALID_POSITION;
 725                         pos = posNext;
 726                         characterOffset -= increment;
 727                 }
 728         } else {
 729                 pos = positionStart + characterOffset;
 730                 if ((pos < 0) || (pos > Length()))
 731                         return INVALID_POSITION;
 732         }
 733         return pos;
 734 }
 735
 736 int SCI_METHOD Document::GetCharacterAndWidth(int position, int *pWidth) const {
 737         int character;
 738         int bytesInCharacter = 1;
 739         if (dbcsCodePage) {
 740                 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(position));
 741                 if (SC_CP_UTF8 == dbcsCodePage) {
 742                         if (UTF8IsAscii(leadByte)) {
 743                                 // Single byte character or invalid
 744                                 character =  leadByte;
 745                         } else {
 746                                 const int widthCharBytes = UTF8BytesOfLead[leadByte];
 747                                 unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0};
 748                                 for (int b=1; b<widthCharBytes; b++)
 749                                         charBytes[b] = static_cast<unsigned char>(cb.CharAt(position+b));
 750                                 int utf8status = UTF8Classify(charBytes, widthCharBytes);
 751                                 if (utf8status & UTF8MaskInvalid) {
 752                                         // Report as singleton surrogate values which are invalid Unicode
 753                                         character =  0xDC80 + leadByte;
 754                                 } else {
 755                                         bytesInCharacter = utf8status & UTF8MaskWidth;
 756                                         character = UnicodeFromBytes(charBytes);
 757                                 }
 758                         }
 759                 } else {
 760                         if (IsDBCSLeadByte(leadByte)) {
 761                                 bytesInCharacter = 2;
 762                                 character = (leadByte << 8) | static_cast<unsigned char>(cb.CharAt(position+1));
 763                         } else {
 764                                 character = leadByte;
 765                         }
 766                 }
 767         } else {
 768                 character = cb.CharAt(position);
 769         }
 770         if (pWidth) {
 771                 *pWidth = bytesInCharacter;
 772         }
 773         return character;
 774 }
 775
 776 int SCI_METHOD Document::CodePage() const {
 777         return dbcsCodePage;
 778 }
 779
 780 bool SCI_METHOD Document::IsDBCSLeadByte(char ch) const {
 781         // Byte ranges found in Wikipedia articles with relevant search strings in each case
 782         unsigned char uch = static_cast<unsigned char>(ch);
 783         switch (dbcsCodePage) {
 784                 case 932:
 785                         // Shift_jis
 786                         return ((uch >= 0x81) && (uch <= 0x9F)) ||
 787                                 ((uch >= 0xE0) && (uch <= 0xFC));
 788                                 // Lead bytes F0 to FC may be a Microsoft addition.
 789                 case 936:
 790                         // GBK
 791                         return (uch >= 0x81) && (uch <= 0xFE);
 792                 case 949:
 793                         // Korean Wansung KS C-5601-1987
 794                         return (uch >= 0x81) && (uch <= 0xFE);
 795                 case 950:
 796                         // Big5
 797                         return (uch >= 0x81) && (uch <= 0xFE);
 798                 case 1361:
 799                         // Korean Johab KS C-5601-1992
 800                         return
 801                                 ((uch >= 0x84) && (uch <= 0xD3)) ||
 802                                 ((uch >= 0xD8) && (uch <= 0xDE)) ||
 803                                 ((uch >= 0xE0) && (uch <= 0xF9));
 804         }
 805         return false;
 806 }
 807
 808 static inline bool IsSpaceOrTab(int ch) {
 809         return ch == ' ' || ch == '\t';
 810 }
 811
 812 // Need to break text into segments near lengthSegment but taking into
 813 // account the encoding to not break inside a UTF-8 or DBCS character
 814 // and also trying to avoid breaking inside a pair of combining characters.
 815 // The segment length must always be long enough (more than 4 bytes)
 816 // so that there will be at least one whole character to make a segment.
 817 // For UTF-8, text must consist only of valid whole characters.
 818 // In preference order from best to worst:
 819 //   1) Break after space
 820 //   2) Break before punctuation
 821 //   3) Break after whole character
 822
 823 int Document::SafeSegment(const char *text, int length, int lengthSegment) const {
 824         if (length <= lengthSegment)
 825                 return length;
 826         int lastSpaceBreak = -1;
 827         int lastPunctuationBreak = -1;
 828         int lastEncodingAllowedBreak = 0;
 829         for (int j=0; j < lengthSegment;) {
 830                 unsigned char ch = static_cast<unsigned char>(text[j]);
 831                 if (j > 0) {
 832                         if (IsSpaceOrTab(text[j - 1]) && !IsSpaceOrTab(text[j])) {
 833                                 lastSpaceBreak = j;
 834                         }
 835                         if (ch < 'A') {
 836                                 lastPunctuationBreak = j;
 837                         }
 838                 }
 839                 lastEncodingAllowedBreak = j;
 840
 841                 if (dbcsCodePage == SC_CP_UTF8) {
 842                         j += UTF8BytesOfLead[ch];
 843                 } else if (dbcsCodePage) {
 844                         j += IsDBCSLeadByte(ch) ? 2 : 1;
 845                 } else {
 846                         j++;
 847                 }
 848         }
 849         if (lastSpaceBreak >= 0) {
 850                 return lastSpaceBreak;
 851         } else if (lastPunctuationBreak >= 0) {
 852                 return lastPunctuationBreak;
 853         }
 854         return lastEncodingAllowedBreak;
 855 }
 856
 857 EncodingFamily Document::CodePageFamily() const {
 858         if (SC_CP_UTF8 == dbcsCodePage)
 859                 return efUnicode;
 860         else if (dbcsCodePage)
 861                 return efDBCS;
 862         else
 863                 return efEightBit;
 864 }
 865
 866 void Document::ModifiedAt(int pos) {
 867         if (endStyled > pos)
 868                 endStyled = pos;
 869 }
 870
 871 void Document::CheckReadOnly() {
 872         if (cb.IsReadOnly() && enteredReadOnlyCount == 0) {
 873                 enteredReadOnlyCount++;
 874                 NotifyModifyAttempt();
 875                 enteredReadOnlyCount--;
 876         }
 877 }
 878
 879 // Document only modified by gateways DeleteChars, InsertString, Undo, Redo, and SetStyleAt.
 880 // SetStyleAt does not change the persistent state of a document
 881
 882 bool Document::DeleteChars(int pos, int len) {
 883         if (len <= 0)
 884                 return false;
 885         if ((pos + len) > Length())
 886                 return false;
 887         CheckReadOnly();
 888         if (enteredModification != 0) {
 889                 return false;
 890         } else {
 891                 enteredModification++;
 892                 if (!cb.IsReadOnly()) {
 893                         NotifyModified(
 894                             DocModification(
 895                                 SC_MOD_BEFOREDELETE | SC_PERFORMED_USER,
 896                                 pos, len,
 897                                 0, 0));
 898                         int prevLinesTotal = LinesTotal();
 899                         bool startSavePoint = cb.IsSavePoint();
 900                         bool startSequence = false;
 901                         const char *text = cb.DeleteChars(pos, len, startSequence);
 902                         if (startSavePoint && cb.IsCollectingUndo())
 903                                 NotifySavePoint(!startSavePoint);
 904                         if ((pos < Length()) || (pos == 0))
 905                                 ModifiedAt(pos);
 906                         else
 907                                 ModifiedAt(pos-1);
 908                         NotifyModified(
 909                             DocModification(
 910                                 SC_MOD_DELETETEXT | SC_PERFORMED_USER | (startSequence?SC_STARTACTION:0),
 911                                 pos, len,
 912                                 LinesTotal() - prevLinesTotal, text));
 913                 }
 914                 enteredModification--;
 915         }
 916         return !cb.IsReadOnly();
 917 }
 918
 919 /**
 920  * Insert a string with a length.
 921  */
 922 bool Document::InsertString(int position, const char *s, int insertLength) {
 923         if (insertLength <= 0) {
 924                 return false;
 925         }
 926         CheckReadOnly();
 927         if (enteredModification != 0) {
 928                 return false;
 929         } else {
 930                 enteredModification++;
 931                 if (!cb.IsReadOnly()) {
 932                         NotifyModified(
 933                             DocModification(
 934                                 SC_MOD_BEFOREINSERT | SC_PERFORMED_USER,
 935                                 position, insertLength,
 936                                 0, s));
 937                         int prevLinesTotal = LinesTotal();
 938                         bool startSavePoint = cb.IsSavePoint();
 939                         bool startSequence = false;
 940                         const char *text = cb.InsertString(position, s, insertLength, startSequence);
 941                         if (startSavePoint && cb.IsCollectingUndo())
 942                                 NotifySavePoint(!startSavePoint);
 943                         ModifiedAt(position);
 944                         NotifyModified(
 945                             DocModification(
 946                                 SC_MOD_INSERTTEXT | SC_PERFORMED_USER | (startSequence?SC_STARTACTION:0),
 947                                 position, insertLength,
 948                                 LinesTotal() - prevLinesTotal, text));
 949                 }
 950                 enteredModification--;
 951         }
 952         return !cb.IsReadOnly();
 953 }
 954
 955 int SCI_METHOD Document::AddData(char *data, int length) {
 956         try {
 957                 int position = Length();
 958                 InsertString(position, data, length);
 959         } catch (std::bad_alloc &) {
 960                 return SC_STATUS_BADALLOC;
 961         } catch (...) {
 962                 return SC_STATUS_FAILURE;
 963         }
 964         return 0;
 965 }
 966
 967 void * SCI_METHOD Document::ConvertToDocument() {
 968         return this;
 969 }
 970
 971 int Document::Undo() {
 972         int newPos = -1;
 973         CheckReadOnly();
 974         if ((enteredModification == 0) && (cb.IsCollectingUndo())) {
 975                 enteredModification++;
 976                 if (!cb.IsReadOnly()) {
 977                         bool startSavePoint = cb.IsSavePoint();
 978                         bool multiLine = false;
 979                         int steps = cb.StartUndo();
 980                         //Platform::DebugPrintf("Steps=%d\n", steps);
 981                         int coalescedRemovePos = -1;
 982                         int coalescedRemoveLen = 0;
 983                         int prevRemoveActionPos = -1;
 984                         int prevRemoveActionLen = 0;
 985                         for (int step = 0; step < steps; step++) {
 986                                 const int prevLinesTotal = LinesTotal();
 987                                 const Action &action = cb.GetUndoStep();
 988                                 if (action.at == removeAction) {
 989                                         NotifyModified(DocModification(
 990                                                                         SC_MOD_BEFOREINSERT | SC_PERFORMED_UNDO, action));
 991                                 } else if (action.at == containerAction) {
 992                                         DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_UNDO);
 993                                         dm.token = action.position;
 994                                         NotifyModified(dm);
 995                                         if (!action.mayCoalesce) {
 996                                                 coalescedRemovePos = -1;
 997                                                 coalescedRemoveLen = 0;
 998                                                 prevRemoveActionPos = -1;
 999                                                 prevRemoveActionLen = 0;
1000                                         }
1001                                 } else {
1002                                         NotifyModified(DocModification(
1003                                                                         SC_MOD_BEFOREDELETE | SC_PERFORMED_UNDO, action));
1004                                 }
1005                                 cb.PerformUndoStep();
1006                                 if (action.at != containerAction) {
1007                                         ModifiedAt(action.position);
1008                                         newPos = action.position;
1009                                 }
1010
1011                                 int modFlags = SC_PERFORMED_UNDO;
1012                                 // With undo, an insertion action becomes a deletion notification
1013                                 if (action.at == removeAction) {
1014                                         newPos += action.lenData;
1015                                         modFlags |= SC_MOD_INSERTTEXT;
1016                                         if ((coalescedRemoveLen > 0) &&
1017                                                 (action.position == prevRemoveActionPos || action.position == (prevRemoveActionPos + prevRemoveActionLen))) {
1018                                                 coalescedRemoveLen += action.lenData;
1019                                                 newPos = coalescedRemovePos + coalescedRemoveLen;
1020                                         } else {
1021                                                 coalescedRemovePos = action.position;
1022                                                 coalescedRemoveLen = action.lenData;
1023                                         }
1024                                         prevRemoveActionPos = action.position;
1025                                         prevRemoveActionLen = action.lenData;
1026                                 } else if (action.at == insertAction) {
1027                                         modFlags |= SC_MOD_DELETETEXT;
1028                                         coalescedRemovePos = -1;
1029                                         coalescedRemoveLen = 0;
1030                                         prevRemoveActionPos = -1;
1031                                         prevRemoveActionLen = 0;
1032                                 }
1033                                 if (steps > 1)
1034                                         modFlags |= SC_MULTISTEPUNDOREDO;
1035                                 const int linesAdded = LinesTotal() - prevLinesTotal;
1036                                 if (linesAdded != 0)
1037                                         multiLine = true;
1038                                 if (step == steps - 1) {
1039                                         modFlags |= SC_LASTSTEPINUNDOREDO;
1040                                         if (multiLine)
1041                                                 modFlags |= SC_MULTILINEUNDOREDO;
1042                                 }
1043                                 NotifyModified(DocModification(modFlags, action.position, action.lenData,
1044                                                                                            linesAdded, action.data));
1045                         }
1046
1047                         bool endSavePoint = cb.IsSavePoint();
1048                         if (startSavePoint != endSavePoint)
1049                                 NotifySavePoint(endSavePoint);
1050                 }
1051                 enteredModification--;
1052         }
1053         return newPos;
1054 }
1055
1056 int Document::Redo() {
1057         int newPos = -1;
1058         CheckReadOnly();
1059         if ((enteredModification == 0) && (cb.IsCollectingUndo())) {
1060                 enteredModification++;
1061                 if (!cb.IsReadOnly()) {
1062                         bool startSavePoint = cb.IsSavePoint();
1063                         bool multiLine = false;
1064                         int steps = cb.StartRedo();
1065                         for (int step = 0; step < steps; step++) {
1066                                 const int prevLinesTotal = LinesTotal();
1067                                 const Action &action = cb.GetRedoStep();
1068                                 if (action.at == insertAction) {
1069                                         NotifyModified(DocModification(
1070                                                                         SC_MOD_BEFOREINSERT | SC_PERFORMED_REDO, action));
1071                                 } else if (action.at == containerAction) {
1072                                         DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_REDO);
1073                                         dm.token = action.position;
1074                                         NotifyModified(dm);
1075                                 } else {
1076                                         NotifyModified(DocModification(
1077                                                                         SC_MOD_BEFOREDELETE | SC_PERFORMED_REDO, action));
1078                                 }
1079                                 cb.PerformRedoStep();
1080                                 if (action.at != containerAction) {
1081                                         ModifiedAt(action.position);
1082                                         newPos = action.position;
1083                                 }
1084
1085                                 int modFlags = SC_PERFORMED_REDO;
1086                                 if (action.at == insertAction) {
1087                                         newPos += action.lenData;
1088                                         modFlags |= SC_MOD_INSERTTEXT;
1089                                 } else if (action.at == removeAction) {
1090                                         modFlags |= SC_MOD_DELETETEXT;
1091                                 }
1092                                 if (steps > 1)
1093                                         modFlags |= SC_MULTISTEPUNDOREDO;
1094                                 const int linesAdded = LinesTotal() - prevLinesTotal;
1095                                 if (linesAdded != 0)
1096                                         multiLine = true;
1097                                 if (step == steps - 1) {
1098                                         modFlags |= SC_LASTSTEPINUNDOREDO;
1099                                         if (multiLine)
1100                                                 modFlags |= SC_MULTILINEUNDOREDO;
1101                                 }
1102                                 NotifyModified(
1103                                         DocModification(modFlags, action.position, action.lenData,
1104                                                                         linesAdded, action.data));
1105                         }
1106
1107                         bool endSavePoint = cb.IsSavePoint();
1108                         if (startSavePoint != endSavePoint)
1109                                 NotifySavePoint(endSavePoint);
1110                 }
1111                 enteredModification--;
1112         }
1113         return newPos;
1114 }
1115
1116 /**
1117  * Insert a single character.
1118  */
1119 bool Document::InsertChar(int pos, char ch) {
1120         char chs[1];
1121         chs[0] = ch;
1122         return InsertString(pos, chs, 1);
1123 }
1124
1125 /**
1126  * Insert a null terminated string.
1127  */
1128 bool Document::InsertCString(int position, const char *s) {
1129         return InsertString(position, s, static_cast<int>(s ? strlen(s) : 0));
1130 }
1131
1132 void Document::DelChar(int pos) {
1133         DeleteChars(pos, LenChar(pos));
1134 }
1135
1136 void Document::DelCharBack(int pos) {
1137         if (pos <= 0) {
1138                 return;
1139         } else if (IsCrLf(pos - 2)) {
1140                 DeleteChars(pos - 2, 2);
1141         } else if (dbcsCodePage) {
1142                 int startChar = NextPosition(pos, -1);
1143                 DeleteChars(startChar, pos - startChar);
1144         } else {
1145                 DeleteChars(pos - 1, 1);
1146         }
1147 }
1148
1149 static int NextTab(int pos, int tabSize) {
1150         return ((pos / tabSize) + 1) * tabSize;
1151 }
1152
1153 static std::string CreateIndentation(int indent, int tabSize, bool insertSpaces) {
1154         std::string indentation;
1155         if (!insertSpaces) {
1156                 while (indent >= tabSize) {
1157                         indentation += '\t';
1158                         indent -= tabSize;
1159                 }
1160         }
1161         while (indent > 0) {
1162                 indentation += ' ';
1163                 indent--;
1164         }
1165         return indentation;
1166 }
1167
1168 int SCI_METHOD Document::GetLineIndentation(int line) {
1169         int indent = 0;
1170         if ((line >= 0) && (line < LinesTotal())) {
1171                 int lineStart = LineStart(line);
1172                 int length = Length();
1173                 for (int i = lineStart; i < length; i++) {
1174                         char ch = cb.CharAt(i);
1175                         if (ch == ' ')
1176                                 indent++;
1177                         else if (ch == '\t')
1178                                 indent = NextTab(indent, tabInChars);
1179                         else
1180                                 return indent;
1181                 }
1182         }
1183         return indent;
1184 }
1185
1186 void Document::SetLineIndentation(int line, int indent) {
1187         int indentOfLine = GetLineIndentation(line);
1188         if (indent < 0)
1189                 indent = 0;
1190         if (indent != indentOfLine) {
1191                 std::string linebuf = CreateIndentation(indent, tabInChars, !useTabs);
1192                 int thisLineStart = LineStart(line);
1193                 int indentPos = GetLineIndentPosition(line);
1194                 UndoGroup ug(this);
1195                 DeleteChars(thisLineStart, indentPos - thisLineStart);
1196                 InsertCString(thisLineStart, linebuf.c_str());
1197         }
1198 }
1199
1200 int Document::GetLineIndentPosition(int line) const {
1201         if (line < 0)
1202                 return 0;
1203         int pos = LineStart(line);
1204         int length = Length();
1205         while ((pos < length) && IsSpaceOrTab(cb.CharAt(pos))) {
1206                 pos++;
1207         }
1208         return pos;
1209 }
1210
1211 int Document::GetColumn(int pos) {
1212         int column = 0;
1213         int line = LineFromPosition(pos);
1214         if ((line >= 0) && (line < LinesTotal())) {
1215                 for (int i = LineStart(line); i < pos;) {
1216                         char ch = cb.CharAt(i);
1217                         if (ch == '\t') {
1218                                 column = NextTab(column, tabInChars);
1219                                 i++;
1220                         } else if (ch == '\r') {
1221                                 return column;
1222                         } else if (ch == '\n') {
1223                                 return column;
1224                         } else if (i >= Length()) {
1225                                 return column;
1226                         } else {
1227                                 column++;
1228                                 i = NextPosition(i, 1);
1229                         }
1230                 }
1231         }
1232         return column;
1233 }
1234
1235 int Document::CountCharacters(int startPos, int endPos) {
1236         startPos = MovePositionOutsideChar(startPos, 1, false);
1237         endPos = MovePositionOutsideChar(endPos, -1, false);
1238         int count = 0;
1239         int i = startPos;
1240         while (i < endPos) {
1241                 count++;
1242                 if (IsCrLf(i))
1243                         i++;
1244                 i = NextPosition(i, 1);
1245         }
1246         return count;
1247 }
1248
1249 int Document::FindColumn(int line, int column) {
1250         int position = LineStart(line);
1251         if ((line >= 0) && (line < LinesTotal())) {
1252                 int columnCurrent = 0;
1253                 while ((columnCurrent < column) && (position < Length())) {
1254                         char ch = cb.CharAt(position);
1255                         if (ch == '\t') {
1256                                 columnCurrent = NextTab(columnCurrent, tabInChars);
1257                                 if (columnCurrent > column)
1258                                         return position;
1259                                 position++;
1260                         } else if (ch == '\r') {
1261                                 return position;
1262                         } else if (ch == '\n') {
1263                                 return position;
1264                         } else {
1265                                 columnCurrent++;
1266                                 position = NextPosition(position, 1);
1267                         }
1268                 }
1269         }
1270         return position;
1271 }
1272
1273 void Document::Indent(bool forwards, int lineBottom, int lineTop) {
1274         // Dedent - suck white space off the front of the line to dedent by equivalent of a tab
1275         for (int line = lineBottom; line >= lineTop; line--) {
1276                 int indentOfLine = GetLineIndentation(line);
1277                 if (forwards) {
1278                         if (LineStart(line) < LineEnd(line)) {
1279                                 SetLineIndentation(line, indentOfLine + IndentSize());
1280                         }
1281                 } else {
1282                         SetLineIndentation(line, indentOfLine - IndentSize());
1283                 }
1284         }
1285 }
1286
1287 // Convert line endings for a piece of text to a particular mode.
1288 // Stop at len or when a NUL is found.
1289 std::string Document::TransformLineEnds(const char *s, size_t len, int eolModeWanted) {
1290         std::string dest;
1291         for (size_t i = 0; (i < len) && (s[i]); i++) {
1292                 if (s[i] == '\n' || s[i] == '\r') {
1293                         if (eolModeWanted == SC_EOL_CR) {
1294                                 dest.push_back('\r');
1295                         } else if (eolModeWanted == SC_EOL_LF) {
1296                                 dest.push_back('\n');
1297                         } else { // eolModeWanted == SC_EOL_CRLF
1298                                 dest.push_back('\r');
1299                                 dest.push_back('\n');
1300                         }
1301                         if ((s[i] == '\r') && (i+1 < len) && (s[i+1] == '\n')) {
1302                                 i++;
1303                         }
1304                 } else {
1305                         dest.push_back(s[i]);
1306                 }
1307         }
1308         return dest;
1309 }
1310
1311 void Document::ConvertLineEnds(int eolModeSet) {
1312         UndoGroup ug(this);
1313
1314         for (int pos = 0; pos < Length(); pos++) {
1315                 if (cb.CharAt(pos) == '\r') {
1316                         if (cb.CharAt(pos + 1) == '\n') {
1317                                 // CRLF
1318                                 if (eolModeSet == SC_EOL_CR) {
1319                                         DeleteChars(pos + 1, 1); // Delete the LF
1320                                 } else if (eolModeSet == SC_EOL_LF) {
1321                                         DeleteChars(pos, 1); // Delete the CR
1322                                 } else {
1323                                         pos++;
1324                                 }
1325                         } else {
1326                                 // CR
1327                                 if (eolModeSet == SC_EOL_CRLF) {
1328                                         InsertString(pos + 1, "\n", 1); // Insert LF
1329                                         pos++;
1330                                 } else if (eolModeSet == SC_EOL_LF) {
1331                                         InsertString(pos, "\n", 1); // Insert LF
1332                                         DeleteChars(pos + 1, 1); // Delete CR
1333                                 }
1334                         }
1335                 } else if (cb.CharAt(pos) == '\n') {
1336                         // LF
1337                         if (eolModeSet == SC_EOL_CRLF) {
1338                                 InsertString(pos, "\r", 1); // Insert CR
1339                                 pos++;
1340                         } else if (eolModeSet == SC_EOL_CR) {
1341                                 InsertString(pos, "\r", 1); // Insert CR
1342                                 DeleteChars(pos + 1, 1); // Delete LF
1343                         }
1344                 }
1345         }
1346
1347 }
1348
1349 bool Document::IsWhiteLine(int line) const {
1350         int currentChar = LineStart(line);
1351         int endLine = LineEnd(line);
1352         while (currentChar < endLine) {
1353                 if (cb.CharAt(currentChar) != ' ' && cb.CharAt(currentChar) != '\t') {
1354                         return false;
1355                 }
1356                 ++currentChar;
1357         }
1358         return true;
1359 }
1360
1361 int Document::ParaUp(int pos) const {
1362         int line = LineFromPosition(pos);
1363         line--;
1364         while (line >= 0 && IsWhiteLine(line)) { // skip empty lines
1365                 line--;
1366         }
1367         while (line >= 0 && !IsWhiteLine(line)) { // skip non-empty lines
1368                 line--;
1369         }
1370         line++;
1371         return LineStart(line);
1372 }
1373
1374 int Document::ParaDown(int pos) const {
1375         int line = LineFromPosition(pos);
1376         while (line < LinesTotal() && !IsWhiteLine(line)) { // skip non-empty lines
1377                 line++;
1378         }
1379         while (line < LinesTotal() && IsWhiteLine(line)) { // skip empty lines
1380                 line++;
1381         }
1382         if (line < LinesTotal())
1383                 return LineStart(line);
1384         else // end of a document
1385                 return LineEnd(line-1);
1386 }
1387
1388 CharClassify::cc Document::WordCharClass(unsigned char ch) const {
1389         if ((SC_CP_UTF8 == dbcsCodePage) && (!UTF8IsAscii(ch)))
1390                 return CharClassify::ccWord;
1391         return charClass.GetClass(ch);
1392 }
1393
1394 /**
1395  * Used by commmands that want to select whole words.
1396  * Finds the start of word at pos when delta < 0 or the end of the word when delta >= 0.
1397  */
1398 int Document::ExtendWordSelect(int pos, int delta, bool onlyWordCharacters) {
1399         CharClassify::cc ccStart = CharClassify::ccWord;
1400         if (delta < 0) {
1401                 if (!onlyWordCharacters)
1402                         ccStart = WordCharClass(cb.CharAt(pos-1));
1403                 while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == ccStart))
1404                         pos--;
1405         } else {
1406                 if (!onlyWordCharacters && pos < Length())
1407                         ccStart = WordCharClass(cb.CharAt(pos));
1408                 while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == ccStart))
1409                         pos++;
1410         }
1411         return MovePositionOutsideChar(pos, delta, true);
1412 }
1413
1414 /**
1415  * Find the start of the next word in either a forward (delta >= 0) or backwards direction
1416  * (delta < 0).
1417  * This is looking for a transition between character classes although there is also some
1418  * additional movement to transit white space.
1419  * Used by cursor movement by word commands.
1420  */
1421 int Document::NextWordStart(int pos, int delta) {
1422         if (delta < 0) {
1423                 while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == CharClassify::ccSpace))
1424                         pos--;
1425                 if (pos > 0) {
1426                         CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos-1));
1427                         while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == ccStart)) {
1428                                 pos--;
1429                         }
1430                 }
1431         } else {
1432                 CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos));
1433                 while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == ccStart))
1434                         pos++;
1435                 while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == CharClassify::ccSpace))
1436                         pos++;
1437         }
1438         return pos;
1439 }
1440
1441 /**
1442  * Find the end of the next word in either a forward (delta >= 0) or backwards direction
1443  * (delta < 0).
1444  * This is looking for a transition between character classes although there is also some
1445  * additional movement to transit white space.
1446  * Used by cursor movement by word commands.
1447  */
1448 int Document::NextWordEnd(int pos, int delta) {
1449         if (delta < 0) {
1450                 if (pos > 0) {
1451                         CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos-1));
1452                         if (ccStart != CharClassify::ccSpace) {
1453                                 while (pos > 0 && WordCharClass(cb.CharAt(pos - 1)) == ccStart) {
1454                                         pos--;
1455                                 }
1456                         }
1457                         while (pos > 0 && WordCharClass(cb.CharAt(pos - 1)) == CharClassify::ccSpace) {
1458                                 pos--;
1459                         }
1460                 }
1461         } else {
1462                 while (pos < Length() && WordCharClass(cb.CharAt(pos)) == CharClassify::ccSpace) {
1463                         pos++;
1464                 }
1465                 if (pos < Length()) {
1466                         CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos));
1467                         while (pos < Length() && WordCharClass(cb.CharAt(pos)) == ccStart) {
1468                                 pos++;
1469                         }
1470                 }
1471         }
1472         return pos;
1473 }
1474
1475 /**
1476  * Check that the character at the given position is a word or punctuation character and that
1477  * the previous character is of a different character class.
1478  */
1479 bool Document::IsWordStartAt(int pos) const {
1480         if (pos > 0) {
1481                 CharClassify::cc ccPos = WordCharClass(CharAt(pos));
1482                 return (ccPos == CharClassify::ccWord || ccPos == CharClassify::ccPunctuation) &&
1483                         (ccPos != WordCharClass(CharAt(pos - 1)));
1484         }
1485         return true;
1486 }
1487
1488 /**
1489  * Check that the character at the given position is a word or punctuation character and that
1490  * the next character is of a different character class.
1491  */
1492 bool Document::IsWordEndAt(int pos) const {
1493         if (pos < Length()) {
1494                 CharClassify::cc ccPrev = WordCharClass(CharAt(pos-1));
1495                 return (ccPrev == CharClassify::ccWord || ccPrev == CharClassify::ccPunctuation) &&
1496                         (ccPrev != WordCharClass(CharAt(pos)));
1497         }
1498         return true;
1499 }
1500
1501 /**
1502  * Check that the given range is has transitions between character classes at both
1503  * ends and where the characters on the inside are word or punctuation characters.
1504  */
1505 bool Document::IsWordAt(int start, int end) const {
1506         return IsWordStartAt(start) && IsWordEndAt(end);
1507 }
1508
1509 bool Document::MatchesWordOptions(bool word, bool wordStart, int pos, int length) const {
1510         return (!word && !wordStart) ||
1511                         (word && IsWordAt(pos, pos + length)) ||
1512                         (wordStart && IsWordStartAt(pos));
1513 }
1514
1515 bool Document::HasCaseFolder(void) const {
1516         return pcf != 0;
1517 }
1518
1519 void Document::SetCaseFolder(CaseFolder *pcf_) {
1520         delete pcf;
1521         pcf = pcf_;
1522 }
1523
1524 /**
1525  * Find text in document, supporting both forward and backward
1526  * searches (just pass minPos > maxPos to do a backward search)
1527  * Has not been tested with backwards DBCS searches yet.
1528  */
1529 long Document::FindText(int minPos, int maxPos, const char *search,
1530                         bool caseSensitive, bool word, bool wordStart, bool regExp, int flags,
1531                         int *length) {
1532         if (*length <= 0)
1533                 return minPos;
1534         if (regExp) {
1535                 if (!regex)
1536                         regex = CreateRegexSearch(&charClass);
1537                 return regex->FindText(this, minPos, maxPos, search, caseSensitive, word, wordStart, flags, length);
1538         } else {
1539
1540                 const bool forward = minPos <= maxPos;
1541                 const int increment = forward ? 1 : -1;
1542
1543                 // Range endpoints should not be inside DBCS characters, but just in case, move them.
1544                 const int startPos = MovePositionOutsideChar(minPos, increment, false);
1545                 const int endPos = MovePositionOutsideChar(maxPos, increment, false);
1546
1547                 // Compute actual search ranges needed
1548                 const int lengthFind = *length;
1549
1550                 //Platform::DebugPrintf("Find %d %d %s %d\n", startPos, endPos, ft->lpstrText, lengthFind);
1551                 const int limitPos = Platform::Maximum(startPos, endPos);
1552                 int pos = startPos;
1553                 if (!forward) {
1554                         // Back all of a character
1555                         pos = NextPosition(pos, increment);
1556                 }
1557                 if (caseSensitive) {
1558                         const int endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos;
1559                         const char charStartSearch =  search[0];
1560                         while (forward ? (pos < endSearch) : (pos >= endSearch)) {
1561                                 if (CharAt(pos) == charStartSearch) {
1562                                         bool found = (pos + lengthFind) <= limitPos;
1563                                         for (int indexSearch = 1; (indexSearch < lengthFind) && found; indexSearch++) {
1564                                                 found = CharAt(pos + indexSearch) == search[indexSearch];
1565                                         }
1566                                         if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
1567                                                 return pos;
1568                                         }
1569                                 }
1570                                 if (!NextCharacter(pos, increment))
1571                                         break;
1572                         }
1573                 } else if (SC_CP_UTF8 == dbcsCodePage) {
1574                         const size_t maxFoldingExpansion = 4;
1575                         std::vector<char> searchThing(lengthFind * UTF8MaxBytes * maxFoldingExpansion + 1);
1576                         const int lenSearch = static_cast<int>(
1577                                 pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind));
1578                         char bytes[UTF8MaxBytes + 1];
1579                         char folded[UTF8MaxBytes * maxFoldingExpansion + 1];
1580                         while (forward ? (pos < endPos) : (pos >= endPos)) {
1581                                 int widthFirstCharacter = 0;
1582                                 int posIndexDocument = pos;
1583                                 int indexSearch = 0;
1584                                 bool characterMatches = true;
1585                                 for (;;) {
1586                                         const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(posIndexDocument));
1587                                         bytes[0] = leadByte;
1588                                         int widthChar = 1;
1589                                         if (!UTF8IsAscii(leadByte)) {
1590                                                 const int widthCharBytes = UTF8BytesOfLead[leadByte];
1591                                                 for (int b=1; b<widthCharBytes; b++) {
1592                                                         bytes[b] = cb.CharAt(posIndexDocument+b);
1593                                                 }
1594                                                 widthChar = UTF8Classify(reinterpret_cast<const unsigned char *>(bytes), widthCharBytes) & UTF8MaskWidth;
1595                                         }
1596                                         if (!widthFirstCharacter)
1597                                                 widthFirstCharacter = widthChar;
1598                                         if ((posIndexDocument + widthChar) > limitPos)
1599                                                 break;
1600                                         const int lenFlat = static_cast<int>(pcf->Fold(folded, sizeof(folded), bytes, widthChar));
1601                                         folded[lenFlat] = 0;
1602                                         // Does folded match the buffer
1603                                         characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat);
1604                                         if (!characterMatches)
1605                                                 break;
1606                                         posIndexDocument += widthChar;
1607                                         indexSearch += lenFlat;
1608                                         if (indexSearch >= lenSearch)
1609                                                 break;
1610                                 }
1611                                 if (characterMatches && (indexSearch == static_cast<int>(lenSearch))) {
1612                                         if (MatchesWordOptions(word, wordStart, pos, posIndexDocument - pos)) {
1613                                                 *length = posIndexDocument - pos;
1614                                                 return pos;
1615                                         }
1616                                 }
1617                                 if (forward) {
1618                                         pos += widthFirstCharacter;
1619                                 } else {
1620                                         if (!NextCharacter(pos, increment))
1621                                                 break;
1622                                 }
1623                         }
1624                 } else if (dbcsCodePage) {
1625                         const size_t maxBytesCharacter = 2;
1626                         const size_t maxFoldingExpansion = 4;
1627                         std::vector<char> searchThing(lengthFind * maxBytesCharacter * maxFoldingExpansion + 1);
1628                         const int lenSearch = static_cast<int>(
1629                                 pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind));
1630                         while (forward ? (pos < endPos) : (pos >= endPos)) {
1631                                 int indexDocument = 0;
1632                                 int indexSearch = 0;
1633                                 bool characterMatches = true;
1634                                 while (characterMatches &&
1635                                         ((pos + indexDocument) < limitPos) &&
1636                                         (indexSearch < lenSearch)) {
1637                                         char bytes[maxBytesCharacter + 1];
1638                                         bytes[0] = cb.CharAt(pos + indexDocument);
1639                                         const int widthChar = IsDBCSLeadByte(bytes[0]) ? 2 : 1;
1640                                         if (widthChar == 2)
1641                                                 bytes[1] = cb.CharAt(pos + indexDocument + 1);
1642                                         if ((pos + indexDocument + widthChar) > limitPos)
1643                                                 break;
1644                                         char folded[maxBytesCharacter * maxFoldingExpansion + 1];
1645                                         const int lenFlat = static_cast<int>(pcf->Fold(folded, sizeof(folded), bytes, widthChar));
1646                                         folded[lenFlat] = 0;
1647                                         // Does folded match the buffer
1648                                         characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat);
1649                                         indexDocument += widthChar;
1650                                         indexSearch += lenFlat;
1651                                 }
1652                                 if (characterMatches && (indexSearch == static_cast<int>(lenSearch))) {
1653                                         if (MatchesWordOptions(word, wordStart, pos, indexDocument)) {
1654                                                 *length = indexDocument;
1655                                                 return pos;
1656                                         }
1657                                 }
1658                                 if (!NextCharacter(pos, increment))
1659                                         break;
1660                         }
1661                 } else {
1662                         const int endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos;
1663                         std::vector<char> searchThing(lengthFind + 1);
1664                         pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind);
1665                         while (forward ? (pos < endSearch) : (pos >= endSearch)) {
1666                                 bool found = (pos + lengthFind) <= limitPos;
1667                                 for (int indexSearch = 0; (indexSearch < lengthFind) && found; indexSearch++) {
1668                                         char ch = CharAt(pos + indexSearch);
1669                                         char folded[2];
1670                                         pcf->Fold(folded, sizeof(folded), &ch, 1);
1671                                         found = folded[0] == searchThing[indexSearch];
1672                                 }
1673                                 if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
1674                                         return pos;
1675                                 }
1676                                 if (!NextCharacter(pos, increment))
1677                                         break;
1678                         }
1679                 }
1680         }
1681         //Platform::DebugPrintf("Not found\n");
1682         return -1;
1683 }
1684
1685 const char *Document::SubstituteByPosition(const char *text, int *length) {
1686         if (regex)
1687                 return regex->SubstituteByPosition(this, text, length);
1688         else
1689                 return 0;
1690 }
1691
1692 int Document::LinesTotal() const {
1693         return cb.Lines();
1694 }
1695
1696 void Document::SetDefaultCharClasses(bool includeWordClass) {
1697     charClass.SetDefaultCharClasses(includeWordClass);
1698 }
1699
1700 void Document::SetCharClasses(const unsigned char *chars, CharClassify::cc newCharClass) {
1701     charClass.SetCharClasses(chars, newCharClass);
1702 }
1703
1704 int Document::GetCharsOfClass(CharClassify::cc characterClass, unsigned char *buffer) {
1705     return charClass.GetCharsOfClass(characterClass, buffer);
1706 }
1707
1708 void Document::SetStylingBits(int bits) {
1709         stylingBits = bits;
1710         stylingBitsMask = (1 << stylingBits) - 1;
1711 }
1712
1713 void SCI_METHOD Document::StartStyling(int position, char mask) {
1714         stylingMask = mask;
1715         endStyled = position;
1716 }
1717
1718 bool SCI_METHOD Document::SetStyleFor(int length, char style) {
1719         if (enteredStyling != 0) {
1720                 return false;
1721         } else {
1722                 enteredStyling++;
1723                 style &= stylingMask;
1724                 int prevEndStyled = endStyled;
1725                 if (cb.SetStyleFor(endStyled, length, style, stylingMask)) {
1726                         DocModification mh(SC_MOD_CHANGESTYLE | SC_PERFORMED_USER,
1727                                            prevEndStyled, length);
1728                         NotifyModified(mh);
1729                 }
1730                 endStyled += length;
1731                 enteredStyling--;
1732                 return true;
1733         }
1734 }
1735
1736 bool SCI_METHOD Document::SetStyles(int length, const char *styles) {
1737         if (enteredStyling != 0) {
1738                 return false;
1739         } else {
1740                 enteredStyling++;
1741                 bool didChange = false;
1742                 int startMod = 0;
1743                 int endMod = 0;
1744                 for (int iPos = 0; iPos < length; iPos++, endStyled++) {
1745                         PLATFORM_ASSERT(endStyled < Length());
1746                         if (cb.SetStyleAt(endStyled, styles[iPos], stylingMask)) {
1747                                 if (!didChange) {
1748                                         startMod = endStyled;
1749                                 }
1750                                 didChange = true;
1751                                 endMod = endStyled;
1752                         }
1753                 }
1754                 if (didChange) {
1755                         DocModification mh(SC_MOD_CHANGESTYLE | SC_PERFORMED_USER,
1756                                            startMod, endMod - startMod + 1);
1757                         NotifyModified(mh);
1758                 }
1759                 enteredStyling--;
1760                 return true;
1761         }
1762 }
1763
1764 void Document::EnsureStyledTo(int pos) {
1765         if ((enteredStyling == 0) && (pos > GetEndStyled())) {
1766                 IncrementStyleClock();
1767                 if (pli && !pli->UseContainerLexing()) {
1768                         int lineEndStyled = LineFromPosition(GetEndStyled());
1769                         int endStyledTo = LineStart(lineEndStyled);
1770                         pli->Colourise(endStyledTo, pos);
1771                 } else {
1772                         // Ask the watchers to style, and stop as soon as one responds.
1773                         for (std::vector<WatcherWithUserData>::iterator it = watchers.begin();
1774                                 (pos > GetEndStyled()) && (it != watchers.end()); ++it) {
1775                                 it->watcher->NotifyStyleNeeded(this, it->userData, pos);
1776                         }
1777                 }
1778         }
1779 }
1780
1781 void Document::LexerChanged() {
1782         // Tell the watchers the lexer has changed.
1783         for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
1784                 it->watcher->NotifyLexerChanged(this, it->userData);
1785         }
1786 }
1787
1788 int SCI_METHOD Document::SetLineState(int line, int state) {
1789         int statePrevious = static_cast<LineState *>(perLineData[ldState])->SetLineState(line, state);
1790         if (state != statePrevious) {
1791                 DocModification mh(SC_MOD_CHANGELINESTATE, LineStart(line), 0, 0, 0, line);
1792                 NotifyModified(mh);
1793         }
1794         return statePrevious;
1795 }
1796
1797 int SCI_METHOD Document::GetLineState(int line) const {
1798         return static_cast<LineState *>(perLineData[ldState])->GetLineState(line);
1799 }
1800
1801 int Document::GetMaxLineState() {
1802         return static_cast<LineState *>(perLineData[ldState])->GetMaxLineState();
1803 }
1804
1805 void SCI_METHOD Document::ChangeLexerState(int start, int end) {
1806         DocModification mh(SC_MOD_LEXERSTATE, start, end-start, 0, 0, 0);
1807         NotifyModified(mh);
1808 }
1809
1810 StyledText Document::MarginStyledText(int line) const {
1811         LineAnnotation *pla = static_cast<LineAnnotation *>(perLineData[ldMargin]);
1812         return StyledText(pla->Length(line), pla->Text(line),
1813                 pla->MultipleStyles(line), pla->Style(line), pla->Styles(line));
1814 }
1815
1816 void Document::MarginSetText(int line, const char *text) {
1817         static_cast<LineAnnotation *>(perLineData[ldMargin])->SetText(line, text);
1818         DocModification mh(SC_MOD_CHANGEMARGIN, LineStart(line), 0, 0, 0, line);
1819         NotifyModified(mh);
1820 }
1821
1822 void Document::MarginSetStyle(int line, int style) {
1823         static_cast<LineAnnotation *>(perLineData[ldMargin])->SetStyle(line, style);
1824         NotifyModified(DocModification(SC_MOD_CHANGEMARGIN, LineStart(line), 0, 0, 0, line));
1825 }
1826
1827 void Document::MarginSetStyles(int line, const unsigned char *styles) {
1828         static_cast<LineAnnotation *>(perLineData[ldMargin])->SetStyles(line, styles);
1829         NotifyModified(DocModification(SC_MOD_CHANGEMARGIN, LineStart(line), 0, 0, 0, line));
1830 }
1831
1832 void Document::MarginClearAll() {
1833         int maxEditorLine = LinesTotal();
1834         for (int l=0; l<maxEditorLine; l++)
1835                 MarginSetText(l, 0);
1836         // Free remaining data
1837         static_cast<LineAnnotation *>(perLineData[ldMargin])->ClearAll();
1838 }
1839
1840 StyledText Document::AnnotationStyledText(int line) const {
1841         LineAnnotation *pla = static_cast<LineAnnotation *>(perLineData[ldAnnotation]);
1842         return StyledText(pla->Length(line), pla->Text(line),
1843                 pla->MultipleStyles(line), pla->Style(line), pla->Styles(line));
1844 }
1845
1846 void Document::AnnotationSetText(int line, const char *text) {
1847         if (line >= 0 && line < LinesTotal()) {
1848                 const int linesBefore = AnnotationLines(line);
1849                 static_cast<LineAnnotation *>(perLineData[ldAnnotation])->SetText(line, text);
1850                 const int linesAfter = AnnotationLines(line);
1851                 DocModification mh(SC_MOD_CHANGEANNOTATION, LineStart(line), 0, 0, 0, line);
1852                 mh.annotationLinesAdded = linesAfter - linesBefore;
1853                 NotifyModified(mh);
1854         }
1855 }
1856
1857 void Document::AnnotationSetStyle(int line, int style) {
1858         static_cast<LineAnnotation *>(perLineData[ldAnnotation])->SetStyle(line, style);
1859         DocModification mh(SC_MOD_CHANGEANNOTATION, LineStart(line), 0, 0, 0, line);
1860         NotifyModified(mh);
1861 }
1862
1863 void Document::AnnotationSetStyles(int line, const unsigned char *styles) {
1864         if (line >= 0 && line < LinesTotal()) {
1865                 static_cast<LineAnnotation *>(perLineData[ldAnnotation])->SetStyles(line, styles);
1866         }
1867 }
1868
1869 int Document::AnnotationLines(int line) const {
1870         return static_cast<LineAnnotation *>(perLineData[ldAnnotation])->Lines(line);
1871 }
1872
1873 void Document::AnnotationClearAll() {
1874         int maxEditorLine = LinesTotal();
1875         for (int l=0; l<maxEditorLine; l++)
1876                 AnnotationSetText(l, 0);
1877         // Free remaining data
1878         static_cast<LineAnnotation *>(perLineData[ldAnnotation])->ClearAll();
1879 }
1880
1881 void Document::IncrementStyleClock() {
1882         styleClock = (styleClock + 1) % 0x100000;
1883 }
1884
1885 void SCI_METHOD Document::DecorationFillRange(int position, int value, int fillLength) {
1886         if (decorations.FillRange(position, value, fillLength)) {
1887                 DocModification mh(SC_MOD_CHANGEINDICATOR | SC_PERFORMED_USER,
1888                                                         position, fillLength);
1889                 NotifyModified(mh);
1890         }
1891 }
1892
1893 bool Document::AddWatcher(DocWatcher *watcher, void *userData) {
1894         WatcherWithUserData wwud(watcher, userData);
1895         std::vector<WatcherWithUserData>::iterator it =
1896                 std::find(watchers.begin(), watchers.end(), wwud);
1897         if (it != watchers.end())
1898                 return false;
1899         watchers.push_back(wwud);
1900         return true;
1901 }
1902
1903 bool Document::RemoveWatcher(DocWatcher *watcher, void *userData) {
1904         std::vector<WatcherWithUserData>::iterator it =
1905                 std::find(watchers.begin(), watchers.end(), WatcherWithUserData(watcher, userData));
1906         if (it != watchers.end()) {
1907                 watchers.erase(it);
1908                 return true;
1909         }
1910         return false;
1911 }
1912
1913 void Document::NotifyModifyAttempt() {
1914         for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
1915                 it->watcher->NotifyModifyAttempt(this, it->userData);
1916         }
1917 }
1918
1919 void Document::NotifySavePoint(bool atSavePoint) {
1920         for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
1921                 it->watcher->NotifySavePoint(this, it->userData, atSavePoint);
1922         }
1923 }
1924
1925 void Document::NotifyModified(DocModification mh) {
1926         if (mh.modificationType & SC_MOD_INSERTTEXT) {
1927                 decorations.InsertSpace(mh.position, mh.length);
1928         } else if (mh.modificationType & SC_MOD_DELETETEXT) {
1929                 decorations.DeleteRange(mh.position, mh.length);
1930         }
1931         for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
1932                 it->watcher->NotifyModified(this, mh, it->userData);
1933         }
1934 }
1935
1936 bool Document::IsWordPartSeparator(char ch) const {
1937         return (WordCharClass(ch) == CharClassify::ccWord) && IsPunctuation(ch);
1938 }
1939
1940 int Document::WordPartLeft(int pos) {
1941         if (pos > 0) {
1942                 --pos;
1943                 char startChar = cb.CharAt(pos);
1944                 if (IsWordPartSeparator(startChar)) {
1945                         while (pos > 0 && IsWordPartSeparator(cb.CharAt(pos))) {
1946                                 --pos;
1947                         }
1948                 }
1949                 if (pos > 0) {
1950                         startChar = cb.CharAt(pos);
1951                         --pos;
1952                         if (IsLowerCase(startChar)) {
1953                                 while (pos > 0 && IsLowerCase(cb.CharAt(pos)))
1954                                         --pos;
1955                                 if (!IsUpperCase(cb.CharAt(pos)) && !IsLowerCase(cb.CharAt(pos)))
1956                                         ++pos;
1957                         } else if (IsUpperCase(startChar)) {
1958                                 while (pos > 0 && IsUpperCase(cb.CharAt(pos)))
1959                                         --pos;
1960                                 if (!IsUpperCase(cb.CharAt(pos)))
1961                                         ++pos;
1962                         } else if (IsADigit(startChar)) {
1963                                 while (pos > 0 && IsADigit(cb.CharAt(pos)))
1964                                         --pos;
1965                                 if (!IsADigit(cb.CharAt(pos)))
1966                                         ++pos;
1967                         } else if (IsPunctuation(startChar)) {
1968                                 while (pos > 0 && IsPunctuation(cb.CharAt(pos)))
1969                                         --pos;
1970                                 if (!IsPunctuation(cb.CharAt(pos)))
1971                                         ++pos;
1972                         } else if (isspacechar(startChar)) {
1973                                 while (pos > 0 && isspacechar(cb.CharAt(pos)))
1974                                         --pos;
1975                                 if (!isspacechar(cb.CharAt(pos)))
1976                                         ++pos;
1977                         } else if (!IsASCII(startChar)) {
1978                                 while (pos > 0 && !IsASCII(cb.CharAt(pos)))
1979                                         --pos;
1980                                 if (IsASCII(cb.CharAt(pos)))
1981                                         ++pos;
1982                         } else {
1983                                 ++pos;
1984                         }
1985                 }
1986         }
1987         return pos;
1988 }
1989
1990 int Document::WordPartRight(int pos) {
1991         char startChar = cb.CharAt(pos);
1992         int length = Length();
1993         if (IsWordPartSeparator(startChar)) {
1994                 while (pos < length && IsWordPartSeparator(cb.CharAt(pos)))
1995                         ++pos;
1996                 startChar = cb.CharAt(pos);
1997         }
1998         if (!IsASCII(startChar)) {
1999                 while (pos < length && !IsASCII(cb.CharAt(pos)))
2000                         ++pos;
2001         } else if (IsLowerCase(startChar)) {
2002                 while (pos < length && IsLowerCase(cb.CharAt(pos)))
2003                         ++pos;
2004         } else if (IsUpperCase(startChar)) {
2005                 if (IsLowerCase(cb.CharAt(pos + 1))) {
2006                         ++pos;
2007                         while (pos < length && IsLowerCase(cb.CharAt(pos)))
2008                                 ++pos;
2009                 } else {
2010                         while (pos < length && IsUpperCase(cb.CharAt(pos)))
2011                                 ++pos;
2012                 }
2013                 if (IsLowerCase(cb.CharAt(pos)) && IsUpperCase(cb.CharAt(pos - 1)))
2014                         --pos;
2015         } else if (IsADigit(startChar)) {
2016                 while (pos < length && IsADigit(cb.CharAt(pos)))
2017                         ++pos;
2018         } else if (IsPunctuation(startChar)) {
2019                 while (pos < length && IsPunctuation(cb.CharAt(pos)))
2020                         ++pos;
2021         } else if (isspacechar(startChar)) {
2022                 while (pos < length && isspacechar(cb.CharAt(pos)))
2023                         ++pos;
2024         } else {
2025                 ++pos;
2026         }
2027         return pos;
2028 }
2029
2030 bool IsLineEndChar(char c) {
2031         return (c == '\n' || c == '\r');
2032 }
2033
2034 int Document::ExtendStyleRange(int pos, int delta, bool singleLine) {
2035         int sStart = cb.StyleAt(pos);
2036         if (delta < 0) {
2037                 while (pos > 0 && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsLineEndChar(cb.CharAt(pos))))
2038                         pos--;
2039                 pos++;
2040         } else {
2041                 while (pos < (Length()) && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsLineEndChar(cb.CharAt(pos))))
2042                         pos++;
2043         }
2044         return pos;
2045 }
2046
2047 static char BraceOpposite(char ch) {
2048         switch (ch) {
2049         case '(':
2050                 return ')';
2051         case ')':
2052                 return '(';
2053         case '[':
2054                 return ']';
2055         case ']':
2056                 return '[';
2057         case '{':
2058                 return '}';
2059         case '}':
2060                 return '{';
2061         case '<':
2062                 return '>';
2063         case '>':
2064                 return '<';
2065         default:
2066                 return '\0';
2067         }
2068 }
2069
2070 // TODO: should be able to extend styled region to find matching brace
2071 int Document::BraceMatch(int position, int /*maxReStyle*/) {
2072         char chBrace = CharAt(position);
2073         char chSeek = BraceOpposite(chBrace);
2074         if (chSeek == '\0')
2075                 return - 1;
2076         char styBrace = static_cast<char>(StyleAt(position) & stylingBitsMask);
2077         int direction = -1;
2078         if (chBrace == '(' || chBrace == '[' || chBrace == '{' || chBrace == '<')
2079                 direction = 1;
2080         int depth = 1;
2081         position = NextPosition(position, direction);
2082         while ((position >= 0) && (position < Length())) {
2083                 char chAtPos = CharAt(position);
2084                 char styAtPos = static_cast<char>(StyleAt(position) & stylingBitsMask);
2085                 if ((position > GetEndStyled()) || (styAtPos == styBrace)) {
2086                         if (chAtPos == chBrace)
2087                                 depth++;
2088                         if (chAtPos == chSeek)
2089                                 depth--;
2090                         if (depth == 0)
2091                                 return position;
2092                 }
2093                 int positionBeforeMove = position;
2094                 position = NextPosition(position, direction);
2095                 if (position == positionBeforeMove)
2096                         break;
2097         }
2098         return - 1;
2099 }
2100
2101 /**
2102  * Implementation of RegexSearchBase for the default built-in regular expression engine
2103  */
2104 class BuiltinRegex : public RegexSearchBase {
2105 public:
2106         explicit BuiltinRegex(CharClassify *charClassTable) : search(charClassTable) {}
2107
2108         virtual ~BuiltinRegex() {
2109         }
2110
2111         virtual long FindText(Document *doc, int minPos, int maxPos, const char *s,
2112                         bool caseSensitive, bool word, bool wordStart, int flags,
2113                         int *length);
2114
2115         virtual const char *SubstituteByPosition(Document *doc, const char *text, int *length);
2116
2117 private:
2118         RESearch search;
2119         std::string substituted;
2120 };
2121
2122 // Define a way for the Regular Expression code to access the document
2123 class DocumentIndexer : public CharacterIndexer {
2124         Document *pdoc;
2125         int end;
2126 public:
2127         DocumentIndexer(Document *pdoc_, int end_) :
2128                 pdoc(pdoc_), end(end_) {
2129         }
2130
2131         virtual ~DocumentIndexer() {
2132         }
2133
2134         virtual char CharAt(int index) {
2135                 if (index < 0 || index >= end)
2136                         return 0;
2137                 else
2138                         return pdoc->CharAt(index);
2139         }
2140 };
2141
2142 long BuiltinRegex::FindText(Document *doc, int minPos, int maxPos, const char *s,
2143                         bool caseSensitive, bool, bool, int flags,
2144                         int *length) {
2145         bool posix = (flags & SCFIND_POSIX) != 0;
2146         int increment = (minPos <= maxPos) ? 1 : -1;
2147
2148         int startPos = minPos;
2149         int endPos = maxPos;
2150
2151         // Range endpoints should not be inside DBCS characters, but just in case, move them.
2152         startPos = doc->MovePositionOutsideChar(startPos, 1, false);
2153         endPos = doc->MovePositionOutsideChar(endPos, 1, false);
2154
2155         const char *errmsg = search.Compile(s, *length, caseSensitive, posix);
2156         if (errmsg) {
2157                 return -1;
2158         }
2159         // Find a variable in a property file: \$(\([A-Za-z0-9_.]+\))
2160         // Replace first '.' with '-' in each property file variable reference:
2161         //     Search: \$(\([A-Za-z0-9_-]+\)\.\([A-Za-z0-9_.]+\))
2162         //     Replace: $(\1-\2)
2163         int lineRangeStart = doc->LineFromPosition(startPos);
2164         int lineRangeEnd = doc->LineFromPosition(endPos);
2165         if ((increment == 1) &&
2166                 (startPos >= doc->LineEnd(lineRangeStart)) &&
2167                 (lineRangeStart < lineRangeEnd)) {
2168                 // the start position is at end of line or between line end characters.
2169                 lineRangeStart++;
2170                 startPos = doc->LineStart(lineRangeStart);
2171         } else if ((increment == -1) &&
2172                    (startPos <= doc->LineStart(lineRangeStart)) &&
2173                    (lineRangeStart > lineRangeEnd)) {
2174                 // the start position is at beginning of line.
2175                 lineRangeStart--;
2176                 startPos = doc->LineEnd(lineRangeStart);
2177         }
2178         int pos = -1;
2179         int lenRet = 0;
2180         char searchEnd = s[*length - 1];
2181         char searchEndPrev = (*length > 1) ? s[*length - 2] : '\0';
2182         int lineRangeBreak = lineRangeEnd + increment;
2183         for (int line = lineRangeStart; line != lineRangeBreak; line += increment) {
2184                 int startOfLine = doc->LineStart(line);
2185                 int endOfLine = doc->LineEnd(line);
2186                 if (increment == 1) {
2187                         if (line == lineRangeStart) {
2188                                 if ((startPos != startOfLine) && (s[0] == '^'))
2189                                         continue;       // Can't match start of line if start position after start of line
2190                                 startOfLine = startPos;
2191                         }
2192                         if (line == lineRangeEnd) {
2193                                 if ((endPos != endOfLine) && (searchEnd == '$') && (searchEndPrev != '\\'))
2194                                         continue;       // Can't match end of line if end position before end of line
2195                                 endOfLine = endPos;
2196                         }
2197                 } else {
2198                         if (line == lineRangeEnd) {
2199                                 if ((endPos != startOfLine) && (s[0] == '^'))
2200                                         continue;       // Can't match start of line if end position after start of line
2201                                 startOfLine = endPos;
2202                         }
2203                         if (line == lineRangeStart) {
2204                                 if ((startPos != endOfLine) && (searchEnd == '$') && (searchEndPrev != '\\'))
2205                                         continue;       // Can't match end of line if start position before end of line
2206                                 endOfLine = startPos;
2207                         }
2208                 }
2209
2210                 DocumentIndexer di(doc, endOfLine);
2211                 int success = search.Execute(di, startOfLine, endOfLine);
2212                 if (success) {
2213                         pos = search.bopat[0];
2214                         // Ensure only whole characters selected
2215                         search.eopat[0] = doc->MovePositionOutsideChar(search.eopat[0], 1, false);
2216                         lenRet = search.eopat[0] - search.bopat[0];
2217                         // There can be only one start of a line, so no need to look for last match in line
2218                         if ((increment == -1) && (s[0] != '^')) {
2219                                 // Check for the last match on this line.
2220                                 int repetitions = 1000; // Break out of infinite loop
2221                                 while (success && (search.eopat[0] <= endOfLine) && (repetitions--)) {
2222                                         success = search.Execute(di, pos+1, endOfLine);
2223                                         if (success) {
2224                                                 if (search.eopat[0] <= minPos) {
2225                                                         pos = search.bopat[0];
2226                                                         lenRet = search.eopat[0] - search.bopat[0];
2227                                                 } else {
2228                                                         success = 0;
2229                                                 }
2230                                         }
2231                                 }
2232                         }
2233                         break;
2234                 }
2235         }
2236         *length = lenRet;
2237         return pos;
2238 }
2239
2240 const char *BuiltinRegex::SubstituteByPosition(Document *doc, const char *text, int *length) {
2241         substituted.clear();
2242         DocumentIndexer di(doc, doc->Length());
2243         search.GrabMatches(di);
2244         for (int j = 0; j < *length; j++) {
2245                 if (text[j] == '\\') {
2246                         if (text[j + 1] >= '0' && text[j + 1] <= '9') {
2247                                 unsigned int patNum = text[j + 1] - '0';
2248                                 unsigned int len = search.eopat[patNum] - search.bopat[patNum];
2249                                 if (!search.pat[patNum].empty())        // Will be null if try for a match that did not occur
2250                                         substituted.append(search.pat[patNum].c_str(), len);
2251                                 j++;
2252                         } else {
2253                                 j++;
2254                                 switch (text[j]) {
2255                                 case 'a':
2256                                         substituted.push_back('\a');
2257                                         break;
2258                                 case 'b':
2259                                         substituted.push_back('\b');
2260                                         break;
2261                                 case 'f':
2262                                         substituted.push_back('\f');
2263                                         break;
2264                                 case 'n':
2265                                         substituted.push_back('\n');
2266                                         break;
2267                                 case 'r':
2268                                         substituted.push_back('\r');
2269                                         break;
2270                                 case 't':
2271                                         substituted.push_back('\t');
2272                                         break;
2273                                 case 'v':
2274                                         substituted.push_back('\v');
2275                                         break;
2276                                 case '\\':
2277                                         substituted.push_back('\\');
2278                                         break;
2279                                 default:
2280                                         substituted.push_back('\\');
2281                                         j--;
2282                                 }
2283                         }
2284                 } else {
2285                         substituted.push_back(text[j]);
2286                 }
2287         }
2288         *length = static_cast<int>(substituted.length());
2289         return substituted.c_str();
2290 }
2291
2292 #ifndef SCI_OWNREGEX
2293
2294 #ifdef SCI_NAMESPACE
2295
2296 RegexSearchBase *Scintilla::CreateRegexSearch(CharClassify *charClassTable) {
2297         return new BuiltinRegex(charClassTable);
2298 }
2299
2300 #else
2301
2302 RegexSearchBase *CreateRegexSearch(CharClassify *charClassTable) {
2303         return new BuiltinRegex(charClassTable);
2304 }
2305
2306 #endif
2307
2308 #endif