ext/scintilla/src/Document.cxx

   1 // Scintilla source code edit control
   2 /** @file Document.cxx
   3  ** Text document that handles notifications, DBCS, styling, words and end of line.
   4  **/
   5 // Copyright 1998-2011 by Neil Hodgson <neilh@scintilla.org>
   6 // The License.txt file describes the conditions under which this software may be distributed.
   7
   8 #include <stdlib.h>
   9 #include <string.h>
  10 #include <stdio.h>
  11 #include <assert.h>
  12 #include <ctype.h>
  13
  14 #include <string>
  15 #include <vector>
  16 #include <algorithm>
  17
  18 #include "Platform.h"
  19
  20 #include "ILexer.h"
  21 #include "Scintilla.h"
  22
  23 #include "CharacterSet.h"
  24 #include "SplitVector.h"
  25 #include "Partitioning.h"
  26 #include "RunStyles.h"
  27 #include "CellBuffer.h"
  28 #include "PerLine.h"
  29 #include "CharClassify.h"
  30 #include "Decoration.h"
  31 #include "CaseFolder.h"
  32 #include "Document.h"
  33 #include "RESearch.h"
  34 #include "UniConversion.h"
  35
  36 #ifdef SCI_NAMESPACE
  37 using namespace Scintilla;
  38 #endif
  39
  40 static inline bool IsPunctuation(char ch) {
  41         return IsASCII(ch) && ispunct(ch);
  42 }
  43
  44 void LexInterface::Colourise(int start, int end) {
  45         if (pdoc && instance && !performingStyle) {
  46                 // Protect against reentrance, which may occur, for example, when
  47                 // fold points are discovered while performing styling and the folding
  48                 // code looks for child lines which may trigger styling.
  49                 performingStyle = true;
  50
  51                 int lengthDoc = pdoc->Length();
  52                 if (end == -1)
  53                         end = lengthDoc;
  54                 int len = end - start;
  55
  56                 PLATFORM_ASSERT(len >= 0);
  57                 PLATFORM_ASSERT(start + len <= lengthDoc);
  58
  59                 int styleStart = 0;
  60                 if (start > 0)
  61                         styleStart = pdoc->StyleAt(start - 1);
  62
  63                 if (len > 0) {
  64                         instance->Lex(start, len, styleStart, pdoc);
  65                         instance->Fold(start, len, styleStart, pdoc);
  66                 }
  67
  68                 performingStyle = false;
  69         }
  70 }
  71
  72 int LexInterface::LineEndTypesSupported() {
  73         if (instance) {
  74                 int interfaceVersion = instance->Version();
  75                 if (interfaceVersion >= lvSubStyles) {
  76                         ILexerWithSubStyles *ssinstance = static_cast<ILexerWithSubStyles *>(instance);
  77                         return ssinstance->LineEndTypesSupported();
  78                 }
  79         }
  80         return 0;
  81 }
  82
  83 Document::Document() {
  84         refCount = 0;
  85         pcf = NULL;
  86 #ifdef _WIN32
  87         eolMode = SC_EOL_CRLF;
  88 #else
  89         eolMode = SC_EOL_LF;
  90 #endif
  91         dbcsCodePage = 0;
  92         lineEndBitSet = SC_LINE_END_TYPE_DEFAULT;
  93         endStyled = 0;
  94         styleClock = 0;
  95         enteredModification = 0;
  96         enteredStyling = 0;
  97         enteredReadOnlyCount = 0;
  98         insertionSet = false;
  99         tabInChars = 8;
 100         indentInChars = 0;
 101         actualIndentInChars = 8;
 102         useTabs = true;
 103         tabIndents = true;
 104         backspaceUnindents = false;
 105
 106         matchesValid = false;
 107         regex = 0;
 108
 109         UTF8BytesOfLeadInitialise();
 110
 111         perLineData[ldMarkers] = new LineMarkers();
 112         perLineData[ldLevels] = new LineLevels();
 113         perLineData[ldState] = new LineState();
 114         perLineData[ldMargin] = new LineAnnotation();
 115         perLineData[ldAnnotation] = new LineAnnotation();
 116
 117         cb.SetPerLine(this);
 118
 119         pli = 0;
 120 }
 121
 122 Document::~Document() {
 123         for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
 124                 it->watcher->NotifyDeleted(this, it->userData);
 125         }
 126         for (int j=0; j<ldSize; j++) {
 127                 delete perLineData[j];
 128                 perLineData[j] = 0;
 129         }
 130         delete regex;
 131         regex = 0;
 132         delete pli;
 133         pli = 0;
 134         delete pcf;
 135         pcf = 0;
 136 }
 137
 138 void Document::Init() {
 139         for (int j=0; j<ldSize; j++) {
 140                 if (perLineData[j])
 141                         perLineData[j]->Init();
 142         }
 143 }
 144
 145 int Document::LineEndTypesSupported() const {
 146         if ((SC_CP_UTF8 == dbcsCodePage) && pli)
 147                 return pli->LineEndTypesSupported();
 148         else
 149                 return 0;
 150 }
 151
 152 bool Document::SetDBCSCodePage(int dbcsCodePage_) {
 153         if (dbcsCodePage != dbcsCodePage_) {
 154                 dbcsCodePage = dbcsCodePage_;
 155                 SetCaseFolder(NULL);
 156                 cb.SetLineEndTypes(lineEndBitSet & LineEndTypesSupported());
 157                 return true;
 158         } else {
 159                 return false;
 160         }
 161 }
 162
 163 bool Document::SetLineEndTypesAllowed(int lineEndBitSet_) {
 164         if (lineEndBitSet != lineEndBitSet_) {
 165                 lineEndBitSet = lineEndBitSet_;
 166                 int lineEndBitSetActive = lineEndBitSet & LineEndTypesSupported();
 167                 if (lineEndBitSetActive != cb.GetLineEndTypes()) {
 168                         ModifiedAt(0);
 169                         cb.SetLineEndTypes(lineEndBitSetActive);
 170                         return true;
 171                 } else {
 172                         return false;
 173                 }
 174         } else {
 175                 return false;
 176         }
 177 }
 178
 179 void Document::InsertLine(int line) {
 180         for (int j=0; j<ldSize; j++) {
 181                 if (perLineData[j])
 182                         perLineData[j]->InsertLine(line);
 183         }
 184 }
 185
 186 void Document::RemoveLine(int line) {
 187         for (int j=0; j<ldSize; j++) {
 188                 if (perLineData[j])
 189                         perLineData[j]->RemoveLine(line);
 190         }
 191 }
 192
 193 // Increase reference count and return its previous value.
 194 int Document::AddRef() {
 195         return refCount++;
 196 }
 197
 198 // Decrease reference count and return its previous value.
 199 // Delete the document if reference count reaches zero.
 200 int SCI_METHOD Document::Release() {
 201         int curRefCount = --refCount;
 202         if (curRefCount == 0)
 203                 delete this;
 204         return curRefCount;
 205 }
 206
 207 void Document::SetSavePoint() {
 208         cb.SetSavePoint();
 209         NotifySavePoint(true);
 210 }
 211
 212 int Document::GetMark(int line) {
 213         return static_cast<LineMarkers *>(perLineData[ldMarkers])->MarkValue(line);
 214 }
 215
 216 int Document::MarkerNext(int lineStart, int mask) const {
 217         return static_cast<LineMarkers *>(perLineData[ldMarkers])->MarkerNext(lineStart, mask);
 218 }
 219
 220 int Document::AddMark(int line, int markerNum) {
 221         if (line >= 0 && line <= LinesTotal()) {
 222                 int prev = static_cast<LineMarkers *>(perLineData[ldMarkers])->
 223                         AddMark(line, markerNum, LinesTotal());
 224                 DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, 0, line);
 225                 NotifyModified(mh);
 226                 return prev;
 227         } else {
 228                 return 0;
 229         }
 230 }
 231
 232 void Document::AddMarkSet(int line, int valueSet) {
 233         if (line < 0 || line > LinesTotal()) {
 234                 return;
 235         }
 236         unsigned int m = valueSet;
 237         for (int i = 0; m; i++, m >>= 1)
 238                 if (m & 1)
 239                         static_cast<LineMarkers *>(perLineData[ldMarkers])->
 240                                 AddMark(line, i, LinesTotal());
 241         DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, 0, line);
 242         NotifyModified(mh);
 243 }
 244
 245 void Document::DeleteMark(int line, int markerNum) {
 246         static_cast<LineMarkers *>(perLineData[ldMarkers])->DeleteMark(line, markerNum, false);
 247         DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, 0, line);
 248         NotifyModified(mh);
 249 }
 250
 251 void Document::DeleteMarkFromHandle(int markerHandle) {
 252         static_cast<LineMarkers *>(perLineData[ldMarkers])->DeleteMarkFromHandle(markerHandle);
 253         DocModification mh(SC_MOD_CHANGEMARKER, 0, 0, 0, 0);
 254         mh.line = -1;
 255         NotifyModified(mh);
 256 }
 257
 258 void Document::DeleteAllMarks(int markerNum) {
 259         bool someChanges = false;
 260         for (int line = 0; line < LinesTotal(); line++) {
 261                 if (static_cast<LineMarkers *>(perLineData[ldMarkers])->DeleteMark(line, markerNum, true))
 262                         someChanges = true;
 263         }
 264         if (someChanges) {
 265                 DocModification mh(SC_MOD_CHANGEMARKER, 0, 0, 0, 0);
 266                 mh.line = -1;
 267                 NotifyModified(mh);
 268         }
 269 }
 270
 271 int Document::LineFromHandle(int markerHandle) {
 272         return static_cast<LineMarkers *>(perLineData[ldMarkers])->LineFromHandle(markerHandle);
 273 }
 274
 275 int SCI_METHOD Document::LineStart(int line) const {
 276         return cb.LineStart(line);
 277 }
 278
 279 int SCI_METHOD Document::LineEnd(int line) const {
 280         if (line >= LinesTotal() - 1) {
 281                 return LineStart(line + 1);
 282         } else {
 283                 int position = LineStart(line + 1);
 284                 if (SC_CP_UTF8 == dbcsCodePage) {
 285                         unsigned char bytes[] = {
 286                                 static_cast<unsigned char>(cb.CharAt(position-3)),
 287                                 static_cast<unsigned char>(cb.CharAt(position-2)),
 288                                 static_cast<unsigned char>(cb.CharAt(position-1)),
 289                         };
 290                         if (UTF8IsSeparator(bytes)) {
 291                                 return position - UTF8SeparatorLength;
 292                         }
 293                         if (UTF8IsNEL(bytes+1)) {
 294                                 return position - UTF8NELLength;
 295                         }
 296                 }
 297                 position--; // Back over CR or LF
 298                 // When line terminator is CR+LF, may need to go back one more
 299                 if ((position > LineStart(line)) && (cb.CharAt(position - 1) == '\r')) {
 300                         position--;
 301                 }
 302                 return position;
 303         }
 304 }
 305
 306 void SCI_METHOD Document::SetErrorStatus(int status) {
 307         // Tell the watchers an error has occurred.
 308         for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
 309                 it->watcher->NotifyErrorOccurred(this, it->userData, status);
 310         }
 311 }
 312
 313 int SCI_METHOD Document::LineFromPosition(int pos) const {
 314         return cb.LineFromPosition(pos);
 315 }
 316
 317 int Document::LineEndPosition(int position) const {
 318         return LineEnd(LineFromPosition(position));
 319 }
 320
 321 bool Document::IsLineEndPosition(int position) const {
 322         return LineEnd(LineFromPosition(position)) == position;
 323 }
 324
 325 bool Document::IsPositionInLineEnd(int position) const {
 326         return position >= LineEnd(LineFromPosition(position));
 327 }
 328
 329 int Document::VCHomePosition(int position) const {
 330         int line = LineFromPosition(position);
 331         int startPosition = LineStart(line);
 332         int endLine = LineEnd(line);
 333         int startText = startPosition;
 334         while (startText < endLine && (cb.CharAt(startText) == ' ' || cb.CharAt(startText) == '\t'))
 335                 startText++;
 336         if (position == startText)
 337                 return startPosition;
 338         else
 339                 return startText;
 340 }
 341
 342 int SCI_METHOD Document::SetLevel(int line, int level) {
 343         int prev = static_cast<LineLevels *>(perLineData[ldLevels])->SetLevel(line, level, LinesTotal());
 344         if (prev != level) {
 345                 DocModification mh(SC_MOD_CHANGEFOLD | SC_MOD_CHANGEMARKER,
 346                                    LineStart(line), 0, 0, 0, line);
 347                 mh.foldLevelNow = level;
 348                 mh.foldLevelPrev = prev;
 349                 NotifyModified(mh);
 350         }
 351         return prev;
 352 }
 353
 354 int SCI_METHOD Document::GetLevel(int line) const {
 355         return static_cast<LineLevels *>(perLineData[ldLevels])->GetLevel(line);
 356 }
 357
 358 void Document::ClearLevels() {
 359         static_cast<LineLevels *>(perLineData[ldLevels])->ClearLevels();
 360 }
 361
 362 static bool IsSubordinate(int levelStart, int levelTry) {
 363         if (levelTry & SC_FOLDLEVELWHITEFLAG)
 364                 return true;
 365         else
 366                 return (levelStart & SC_FOLDLEVELNUMBERMASK) < (levelTry & SC_FOLDLEVELNUMBERMASK);
 367 }
 368
 369 int Document::GetLastChild(int lineParent, int level, int lastLine) {
 370         if (level == -1)
 371                 level = GetLevel(lineParent) & SC_FOLDLEVELNUMBERMASK;
 372         int maxLine = LinesTotal();
 373         int lookLastLine = (lastLine != -1) ? Platform::Minimum(LinesTotal() - 1, lastLine) : -1;
 374         int lineMaxSubord = lineParent;
 375         while (lineMaxSubord < maxLine - 1) {
 376                 EnsureStyledTo(LineStart(lineMaxSubord + 2));
 377                 if (!IsSubordinate(level, GetLevel(lineMaxSubord + 1)))
 378                         break;
 379                 if ((lookLastLine != -1) && (lineMaxSubord >= lookLastLine) && !(GetLevel(lineMaxSubord) & SC_FOLDLEVELWHITEFLAG))
 380                         break;
 381                 lineMaxSubord++;
 382         }
 383         if (lineMaxSubord > lineParent) {
 384                 if (level > (GetLevel(lineMaxSubord + 1) & SC_FOLDLEVELNUMBERMASK)) {
 385                         // Have chewed up some whitespace that belongs to a parent so seek back
 386                         if (GetLevel(lineMaxSubord) & SC_FOLDLEVELWHITEFLAG) {
 387                                 lineMaxSubord--;
 388                         }
 389                 }
 390         }
 391         return lineMaxSubord;
 392 }
 393
 394 int Document::GetFoldParent(int line) const {
 395         int level = GetLevel(line) & SC_FOLDLEVELNUMBERMASK;
 396         int lineLook = line - 1;
 397         while ((lineLook > 0) && (
 398                     (!(GetLevel(lineLook) & SC_FOLDLEVELHEADERFLAG)) ||
 399                     ((GetLevel(lineLook) & SC_FOLDLEVELNUMBERMASK) >= level))
 400               ) {
 401                 lineLook--;
 402         }
 403         if ((GetLevel(lineLook) & SC_FOLDLEVELHEADERFLAG) &&
 404                 ((GetLevel(lineLook) & SC_FOLDLEVELNUMBERMASK) < level)) {
 405                 return lineLook;
 406         } else {
 407                 return -1;
 408         }
 409 }
 410
 411 void Document::GetHighlightDelimiters(HighlightDelimiter &highlightDelimiter, int line, int lastLine) {
 412         int level = GetLevel(line);
 413         int lookLastLine = Platform::Maximum(line, lastLine) + 1;
 414
 415         int lookLine = line;
 416         int lookLineLevel = level;
 417         int lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
 418         while ((lookLine > 0) && ((lookLineLevel & SC_FOLDLEVELWHITEFLAG) ||
 419                 ((lookLineLevel & SC_FOLDLEVELHEADERFLAG) && (lookLineLevelNum >= (GetLevel(lookLine + 1) & SC_FOLDLEVELNUMBERMASK))))) {
 420                 lookLineLevel = GetLevel(--lookLine);
 421                 lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
 422         }
 423
 424         int beginFoldBlock = (lookLineLevel & SC_FOLDLEVELHEADERFLAG) ? lookLine : GetFoldParent(lookLine);
 425         if (beginFoldBlock == -1) {
 426                 highlightDelimiter.Clear();
 427                 return;
 428         }
 429
 430         int endFoldBlock = GetLastChild(beginFoldBlock, -1, lookLastLine);
 431         int firstChangeableLineBefore = -1;
 432         if (endFoldBlock < line) {
 433                 lookLine = beginFoldBlock - 1;
 434                 lookLineLevel = GetLevel(lookLine);
 435                 lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
 436                 while ((lookLine >= 0) && (lookLineLevelNum >= SC_FOLDLEVELBASE)) {
 437                         if (lookLineLevel & SC_FOLDLEVELHEADERFLAG) {
 438                                 if (GetLastChild(lookLine, -1, lookLastLine) == line) {
 439                                         beginFoldBlock = lookLine;
 440                                         endFoldBlock = line;
 441                                         firstChangeableLineBefore = line - 1;
 442                                 }
 443                         }
 444                         if ((lookLine > 0) && (lookLineLevelNum == SC_FOLDLEVELBASE) && ((GetLevel(lookLine - 1) & SC_FOLDLEVELNUMBERMASK) > lookLineLevelNum))
 445                                 break;
 446                         lookLineLevel = GetLevel(--lookLine);
 447                         lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
 448                 }
 449         }
 450         if (firstChangeableLineBefore == -1) {
 451                 for (lookLine = line - 1, lookLineLevel = GetLevel(lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
 452                         lookLine >= beginFoldBlock;
 453                         lookLineLevel = GetLevel(--lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK) {
 454                         if ((lookLineLevel & SC_FOLDLEVELWHITEFLAG) || (lookLineLevelNum > (level & SC_FOLDLEVELNUMBERMASK))) {
 455                                 firstChangeableLineBefore = lookLine;
 456                                 break;
 457                         }
 458                 }
 459         }
 460         if (firstChangeableLineBefore == -1)
 461                 firstChangeableLineBefore = beginFoldBlock - 1;
 462
 463         int firstChangeableLineAfter = -1;
 464         for (lookLine = line + 1, lookLineLevel = GetLevel(lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
 465                 lookLine <= endFoldBlock;
 466                 lookLineLevel = GetLevel(++lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK) {
 467                 if ((lookLineLevel & SC_FOLDLEVELHEADERFLAG) && (lookLineLevelNum < (GetLevel(lookLine + 1) & SC_FOLDLEVELNUMBERMASK))) {
 468                         firstChangeableLineAfter = lookLine;
 469                         break;
 470                 }
 471         }
 472         if (firstChangeableLineAfter == -1)
 473                 firstChangeableLineAfter = endFoldBlock + 1;
 474
 475         highlightDelimiter.beginFoldBlock = beginFoldBlock;
 476         highlightDelimiter.endFoldBlock = endFoldBlock;
 477         highlightDelimiter.firstChangeableLineBefore = firstChangeableLineBefore;
 478         highlightDelimiter.firstChangeableLineAfter = firstChangeableLineAfter;
 479 }
 480
 481 int Document::ClampPositionIntoDocument(int pos) const {
 482         return Platform::Clamp(pos, 0, Length());
 483 }
 484
 485 bool Document::IsCrLf(int pos) const {
 486         if (pos < 0)
 487                 return false;
 488         if (pos >= (Length() - 1))
 489                 return false;
 490         return (cb.CharAt(pos) == '\r') && (cb.CharAt(pos + 1) == '\n');
 491 }
 492
 493 int Document::LenChar(int pos) {
 494         if (pos < 0) {
 495                 return 1;
 496         } else if (IsCrLf(pos)) {
 497                 return 2;
 498         } else if (SC_CP_UTF8 == dbcsCodePage) {
 499                 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(pos));
 500                 const int widthCharBytes = UTF8BytesOfLead[leadByte];
 501                 int lengthDoc = Length();
 502                 if ((pos + widthCharBytes) > lengthDoc)
 503                         return lengthDoc - pos;
 504                 else
 505                         return widthCharBytes;
 506         } else if (dbcsCodePage) {
 507                 return IsDBCSLeadByte(cb.CharAt(pos)) ? 2 : 1;
 508         } else {
 509                 return 1;
 510         }
 511 }
 512
 513 bool Document::InGoodUTF8(int pos, int &start, int &end) const {
 514         int trail = pos;
 515         while ((trail>0) && (pos-trail < UTF8MaxBytes) && UTF8IsTrailByte(static_cast<unsigned char>(cb.CharAt(trail-1))))
 516                 trail--;
 517         start = (trail > 0) ? trail-1 : trail;
 518
 519         const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(start));
 520         const int widthCharBytes = UTF8BytesOfLead[leadByte];
 521         if (widthCharBytes == 1) {
 522                 return false;
 523         } else {
 524                 int trailBytes = widthCharBytes - 1;
 525                 int len = pos - start;
 526                 if (len > trailBytes)
 527                         // pos too far from lead
 528                         return false;
 529                 char charBytes[UTF8MaxBytes] = {static_cast<char>(leadByte),0,0,0};
 530                 for (int b=1; b<widthCharBytes && ((start+b) < Length()); b++)
 531                         charBytes[b] = cb.CharAt(static_cast<int>(start+b));
 532                 int utf8status = UTF8Classify(reinterpret_cast<const unsigned char *>(charBytes), widthCharBytes);
 533                 if (utf8status & UTF8MaskInvalid)
 534                         return false;
 535                 end = start + widthCharBytes;
 536                 return true;
 537         }
 538 }
 539
 540 // Normalise a position so that it is not halfway through a two byte character.
 541 // This can occur in two situations -
 542 // When lines are terminated with \r\n pairs which should be treated as one character.
 543 // When displaying DBCS text such as Japanese.
 544 // If moving, move the position in the indicated direction.
 545 int Document::MovePositionOutsideChar(int pos, int moveDir, bool checkLineEnd) {
 546         //Platform::DebugPrintf("NoCRLF %d %d\n", pos, moveDir);
 547         // If out of range, just return minimum/maximum value.
 548         if (pos <= 0)
 549                 return 0;
 550         if (pos >= Length())
 551                 return Length();
 552
 553         // PLATFORM_ASSERT(pos > 0 && pos < Length());
 554         if (checkLineEnd && IsCrLf(pos - 1)) {
 555                 if (moveDir > 0)
 556                         return pos + 1;
 557                 else
 558                         return pos - 1;
 559         }
 560
 561         if (dbcsCodePage) {
 562                 if (SC_CP_UTF8 == dbcsCodePage) {
 563                         unsigned char ch = static_cast<unsigned char>(cb.CharAt(pos));
 564                         // If ch is not a trail byte then pos is valid intercharacter position
 565                         if (UTF8IsTrailByte(ch)) {
 566                                 int startUTF = pos;
 567                                 int endUTF = pos;
 568                                 if (InGoodUTF8(pos, startUTF, endUTF)) {
 569                                         // ch is a trail byte within a UTF-8 character
 570                                         if (moveDir > 0)
 571                                                 pos = endUTF;
 572                                         else
 573                                                 pos = startUTF;
 574                                 }
 575                                 // Else invalid UTF-8 so return position of isolated trail byte
 576                         }
 577                 } else {
 578                         // Anchor DBCS calculations at start of line because start of line can
 579                         // not be a DBCS trail byte.
 580                         int posStartLine = LineStart(LineFromPosition(pos));
 581                         if (pos == posStartLine)
 582                                 return pos;
 583
 584                         // Step back until a non-lead-byte is found.
 585                         int posCheck = pos;
 586                         while ((posCheck > posStartLine) && IsDBCSLeadByte(cb.CharAt(posCheck-1)))
 587                                 posCheck--;
 588
 589                         // Check from known start of character.
 590                         while (posCheck < pos) {
 591                                 int mbsize = IsDBCSLeadByte(cb.CharAt(posCheck)) ? 2 : 1;
 592                                 if (posCheck + mbsize == pos) {
 593                                         return pos;
 594                                 } else if (posCheck + mbsize > pos) {
 595                                         if (moveDir > 0) {
 596                                                 return posCheck + mbsize;
 597                                         } else {
 598                                                 return posCheck;
 599                                         }
 600                                 }
 601                                 posCheck += mbsize;
 602                         }
 603                 }
 604         }
 605
 606         return pos;
 607 }
 608
 609 // NextPosition moves between valid positions - it can not handle a position in the middle of a
 610 // multi-byte character. It is used to iterate through text more efficiently than MovePositionOutsideChar.
 611 // A \r\n pair is treated as two characters.
 612 int Document::NextPosition(int pos, int moveDir) const {
 613         // If out of range, just return minimum/maximum value.
 614         int increment = (moveDir > 0) ? 1 : -1;
 615         if (pos + increment <= 0)
 616                 return 0;
 617         if (pos + increment >= Length())
 618                 return Length();
 619
 620         if (dbcsCodePage) {
 621                 if (SC_CP_UTF8 == dbcsCodePage) {
 622                         if (increment == 1) {
 623                                 // Simple forward movement case so can avoid some checks
 624                                 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(pos));
 625                                 if (UTF8IsAscii(leadByte)) {
 626                                         // Single byte character or invalid
 627                                         pos++;
 628                                 } else {
 629                                         const int widthCharBytes = UTF8BytesOfLead[leadByte];
 630                                         char charBytes[UTF8MaxBytes] = {static_cast<char>(leadByte),0,0,0};
 631                                         for (int b=1; b<widthCharBytes; b++)
 632                                                 charBytes[b] = cb.CharAt(static_cast<int>(pos+b));
 633                                         int utf8status = UTF8Classify(reinterpret_cast<const unsigned char *>(charBytes), widthCharBytes);
 634                                         if (utf8status & UTF8MaskInvalid)
 635                                                 pos++;
 636                                         else
 637                                                 pos += utf8status & UTF8MaskWidth;
 638                                 }
 639                         } else {
 640                                 // Examine byte before position
 641                                 pos--;
 642                                 unsigned char ch = static_cast<unsigned char>(cb.CharAt(pos));
 643                                 // If ch is not a trail byte then pos is valid intercharacter position
 644                                 if (UTF8IsTrailByte(ch)) {
 645                                         // If ch is a trail byte in a valid UTF-8 character then return start of character
 646                                         int startUTF = pos;
 647                                         int endUTF = pos;
 648                                         if (InGoodUTF8(pos, startUTF, endUTF)) {
 649                                                 pos = startUTF;
 650                                         }
 651                                         // Else invalid UTF-8 so return position of isolated trail byte
 652                                 }
 653                         }
 654                 } else {
 655                         if (moveDir > 0) {
 656                                 int mbsize = IsDBCSLeadByte(cb.CharAt(pos)) ? 2 : 1;
 657                                 pos += mbsize;
 658                                 if (pos > Length())
 659                                         pos = Length();
 660                         } else {
 661                                 // Anchor DBCS calculations at start of line because start of line can
 662                                 // not be a DBCS trail byte.
 663                                 int posStartLine = LineStart(LineFromPosition(pos));
 664                                 // See http://msdn.microsoft.com/en-us/library/cc194792%28v=MSDN.10%29.aspx
 665                                 // http://msdn.microsoft.com/en-us/library/cc194790.aspx
 666                                 if ((pos - 1) <= posStartLine) {
 667                                         return pos - 1;
 668                                 } else if (IsDBCSLeadByte(cb.CharAt(pos - 1))) {
 669                                         // Must actually be trail byte
 670                                         return pos - 2;
 671                                 } else {
 672                                         // Otherwise, step back until a non-lead-byte is found.
 673                                         int posTemp = pos - 1;
 674                                         while (posStartLine <= --posTemp && IsDBCSLeadByte(cb.CharAt(posTemp)))
 675                                                 ;
 676                                         // Now posTemp+1 must point to the beginning of a character,
 677                                         // so figure out whether we went back an even or an odd
 678                                         // number of bytes and go back 1 or 2 bytes, respectively.
 679                                         return (pos - 1 - ((pos - posTemp) & 1));
 680                                 }
 681                         }
 682                 }
 683         } else {
 684                 pos += increment;
 685         }
 686
 687         return pos;
 688 }
 689
 690 bool Document::NextCharacter(int &pos, int moveDir) const {
 691         // Returns true if pos changed
 692         int posNext = NextPosition(pos, moveDir);
 693         if (posNext == pos) {
 694                 return false;
 695         } else {
 696                 pos = posNext;
 697                 return true;
 698         }
 699 }
 700
 701 static inline int UnicodeFromBytes(const unsigned char *us) {
 702         if (us[0] < 0xC2) {
 703                 return us[0];
 704         } else if (us[0] < 0xE0) {
 705                 return ((us[0] & 0x1F) << 6) + (us[1] & 0x3F);
 706         } else if (us[0] < 0xF0) {
 707                 return ((us[0] & 0xF) << 12) + ((us[1] & 0x3F) << 6) + (us[2] & 0x3F);
 708         } else if (us[0] < 0xF5) {
 709                 return ((us[0] & 0x7) << 18) + ((us[1] & 0x3F) << 12) + ((us[2] & 0x3F) << 6) + (us[3] & 0x3F);
 710         }
 711         return us[0];
 712 }
 713
 714 // Return -1  on out-of-bounds
 715 int SCI_METHOD Document::GetRelativePosition(int positionStart, int characterOffset) const {
 716         int pos = positionStart;
 717         if (dbcsCodePage) {
 718                 const int increment = (characterOffset > 0) ? 1 : -1;
 719                 while (characterOffset != 0) {
 720                         const int posNext = NextPosition(pos, increment);
 721                         if (posNext == pos)
 722                                 return INVALID_POSITION;
 723                         pos = posNext;
 724                         characterOffset -= increment;
 725                 }
 726         } else {
 727                 pos = positionStart + characterOffset;
 728                 if ((pos < 0) || (pos > Length()))
 729                         return INVALID_POSITION;
 730         }
 731         return pos;
 732 }
 733
 734 int SCI_METHOD Document::GetCharacterAndWidth(int position, int *pWidth) const {
 735         int character;
 736         int bytesInCharacter = 1;
 737         if (dbcsCodePage) {
 738                 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(position));
 739                 if (SC_CP_UTF8 == dbcsCodePage) {
 740                         if (UTF8IsAscii(leadByte)) {
 741                                 // Single byte character or invalid
 742                                 character =  leadByte;
 743                         } else {
 744                                 const int widthCharBytes = UTF8BytesOfLead[leadByte];
 745                                 unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0};
 746                                 for (int b=1; b<widthCharBytes; b++)
 747                                         charBytes[b] = static_cast<unsigned char>(cb.CharAt(position+b));
 748                                 int utf8status = UTF8Classify(charBytes, widthCharBytes);
 749                                 if (utf8status & UTF8MaskInvalid) {
 750                                         // Report as singleton surrogate values which are invalid Unicode
 751                                         character =  0xDC80 + leadByte;
 752                                 } else {
 753                                         bytesInCharacter = utf8status & UTF8MaskWidth;
 754                                         character = UnicodeFromBytes(charBytes);
 755                                 }
 756                         }
 757                 } else {
 758                         if (IsDBCSLeadByte(leadByte)) {
 759                                 bytesInCharacter = 2;
 760                                 character = (leadByte << 8) | static_cast<unsigned char>(cb.CharAt(position+1));
 761                         } else {
 762                                 character = leadByte;
 763                         }
 764                 }
 765         } else {
 766                 character = cb.CharAt(position);
 767         }
 768         if (pWidth) {
 769                 *pWidth = bytesInCharacter;
 770         }
 771         return character;
 772 }
 773
 774 int SCI_METHOD Document::CodePage() const {
 775         return dbcsCodePage;
 776 }
 777
 778 bool SCI_METHOD Document::IsDBCSLeadByte(char ch) const {
 779         // Byte ranges found in Wikipedia articles with relevant search strings in each case
 780         unsigned char uch = static_cast<unsigned char>(ch);
 781         switch (dbcsCodePage) {
 782                 case 932:
 783                         // Shift_jis
 784                         return ((uch >= 0x81) && (uch <= 0x9F)) ||
 785                                 ((uch >= 0xE0) && (uch <= 0xFC));
 786                                 // Lead bytes F0 to FC may be a Microsoft addition.
 787                 case 936:
 788                         // GBK
 789                         return (uch >= 0x81) && (uch <= 0xFE);
 790                 case 949:
 791                         // Korean Wansung KS C-5601-1987
 792                         return (uch >= 0x81) && (uch <= 0xFE);
 793                 case 950:
 794                         // Big5
 795                         return (uch >= 0x81) && (uch <= 0xFE);
 796                 case 1361:
 797                         // Korean Johab KS C-5601-1992
 798                         return
 799                                 ((uch >= 0x84) && (uch <= 0xD3)) ||
 800                                 ((uch >= 0xD8) && (uch <= 0xDE)) ||
 801                                 ((uch >= 0xE0) && (uch <= 0xF9));
 802         }
 803         return false;
 804 }
 805
 806 static inline bool IsSpaceOrTab(int ch) {
 807         return ch == ' ' || ch == '\t';
 808 }
 809
 810 // Need to break text into segments near lengthSegment but taking into
 811 // account the encoding to not break inside a UTF-8 or DBCS character
 812 // and also trying to avoid breaking inside a pair of combining characters.
 813 // The segment length must always be long enough (more than 4 bytes)
 814 // so that there will be at least one whole character to make a segment.
 815 // For UTF-8, text must consist only of valid whole characters.
 816 // In preference order from best to worst:
 817 //   1) Break after space
 818 //   2) Break before punctuation
 819 //   3) Break after whole character
 820
 821 int Document::SafeSegment(const char *text, int length, int lengthSegment) const {
 822         if (length <= lengthSegment)
 823                 return length;
 824         int lastSpaceBreak = -1;
 825         int lastPunctuationBreak = -1;
 826         int lastEncodingAllowedBreak = 0;
 827         for (int j=0; j < lengthSegment;) {
 828                 unsigned char ch = static_cast<unsigned char>(text[j]);
 829                 if (j > 0) {
 830                         if (IsSpaceOrTab(text[j - 1]) && !IsSpaceOrTab(text[j])) {
 831                                 lastSpaceBreak = j;
 832                         }
 833                         if (ch < 'A') {
 834                                 lastPunctuationBreak = j;
 835                         }
 836                 }
 837                 lastEncodingAllowedBreak = j;
 838
 839                 if (dbcsCodePage == SC_CP_UTF8) {
 840                         j += UTF8BytesOfLead[ch];
 841                 } else if (dbcsCodePage) {
 842                         j += IsDBCSLeadByte(ch) ? 2 : 1;
 843                 } else {
 844                         j++;
 845                 }
 846         }
 847         if (lastSpaceBreak >= 0) {
 848                 return lastSpaceBreak;
 849         } else if (lastPunctuationBreak >= 0) {
 850                 return lastPunctuationBreak;
 851         }
 852         return lastEncodingAllowedBreak;
 853 }
 854
 855 EncodingFamily Document::CodePageFamily() const {
 856         if (SC_CP_UTF8 == dbcsCodePage)
 857                 return efUnicode;
 858         else if (dbcsCodePage)
 859                 return efDBCS;
 860         else
 861                 return efEightBit;
 862 }
 863
 864 void Document::ModifiedAt(int pos) {
 865         if (endStyled > pos)
 866                 endStyled = pos;
 867 }
 868
 869 void Document::CheckReadOnly() {
 870         if (cb.IsReadOnly() && enteredReadOnlyCount == 0) {
 871                 enteredReadOnlyCount++;
 872                 NotifyModifyAttempt();
 873                 enteredReadOnlyCount--;
 874         }
 875 }
 876
 877 // Document only modified by gateways DeleteChars, InsertString, Undo, Redo, and SetStyleAt.
 878 // SetStyleAt does not change the persistent state of a document
 879
 880 bool Document::DeleteChars(int pos, int len) {
 881         if (len <= 0)
 882                 return false;
 883         if ((pos + len) > Length())
 884                 return false;
 885         CheckReadOnly();
 886         if (enteredModification != 0) {
 887                 return false;
 888         } else {
 889                 enteredModification++;
 890                 if (!cb.IsReadOnly()) {
 891                         NotifyModified(
 892                             DocModification(
 893                                 SC_MOD_BEFOREDELETE | SC_PERFORMED_USER,
 894                                 pos, len,
 895                                 0, 0));
 896                         int prevLinesTotal = LinesTotal();
 897                         bool startSavePoint = cb.IsSavePoint();
 898                         bool startSequence = false;
 899                         const char *text = cb.DeleteChars(pos, len, startSequence);
 900                         if (startSavePoint && cb.IsCollectingUndo())
 901                                 NotifySavePoint(!startSavePoint);
 902                         if ((pos < Length()) || (pos == 0))
 903                                 ModifiedAt(pos);
 904                         else
 905                                 ModifiedAt(pos-1);
 906                         NotifyModified(
 907                             DocModification(
 908                                 SC_MOD_DELETETEXT | SC_PERFORMED_USER | (startSequence?SC_STARTACTION:0),
 909                                 pos, len,
 910                                 LinesTotal() - prevLinesTotal, text));
 911                 }
 912                 enteredModification--;
 913         }
 914         return !cb.IsReadOnly();
 915 }
 916
 917 /**
 918  * Insert a string with a length.
 919  */
 920 int Document::InsertString(int position, const char *s, int insertLength) {
 921         if (insertLength <= 0) {
 922                 return 0;
 923         }
 924         CheckReadOnly();        // Application may change read only state here
 925         if (cb.IsReadOnly()) {
 926                 return 0;
 927         }
 928         if (enteredModification != 0) {
 929                 return 0;
 930         }
 931         enteredModification++;
 932         insertionSet = false;
 933         insertion.clear();
 934         NotifyModified(
 935                 DocModification(
 936                         SC_MOD_INSERTCHECK,
 937                         position, insertLength,
 938                         0, s));
 939         if (insertionSet) {
 940                 s = insertion.c_str();
 941                 insertLength = static_cast<int>(insertion.length());
 942         }
 943         NotifyModified(
 944                 DocModification(
 945                         SC_MOD_BEFOREINSERT | SC_PERFORMED_USER,
 946                         position, insertLength,
 947                         0, s));
 948         int prevLinesTotal = LinesTotal();
 949         bool startSavePoint = cb.IsSavePoint();
 950         bool startSequence = false;
 951         const char *text = cb.InsertString(position, s, insertLength, startSequence);
 952         if (startSavePoint && cb.IsCollectingUndo())
 953                 NotifySavePoint(!startSavePoint);
 954         ModifiedAt(position);
 955         NotifyModified(
 956                 DocModification(
 957                         SC_MOD_INSERTTEXT | SC_PERFORMED_USER | (startSequence?SC_STARTACTION:0),
 958                         position, insertLength,
 959                         LinesTotal() - prevLinesTotal, text));
 960         if (insertionSet) {     // Free memory as could be large
 961                 std::string().swap(insertion);
 962         }
 963         enteredModification--;
 964         return insertLength;
 965 }
 966
 967 void Document::ChangeInsertion(const char *s, int length) {
 968         insertionSet = true;
 969         insertion.assign(s, length);
 970 }
 971
 972 int SCI_METHOD Document::AddData(char *data, int length) {
 973         try {
 974                 int position = Length();
 975                 InsertString(position, data, length);
 976         } catch (std::bad_alloc &) {
 977                 return SC_STATUS_BADALLOC;
 978         } catch (...) {
 979                 return SC_STATUS_FAILURE;
 980         }
 981         return 0;
 982 }
 983
 984 void * SCI_METHOD Document::ConvertToDocument() {
 985         return this;
 986 }
 987
 988 int Document::Undo() {
 989         int newPos = -1;
 990         CheckReadOnly();
 991         if ((enteredModification == 0) && (cb.IsCollectingUndo())) {
 992                 enteredModification++;
 993                 if (!cb.IsReadOnly()) {
 994                         bool startSavePoint = cb.IsSavePoint();
 995                         bool multiLine = false;
 996                         int steps = cb.StartUndo();
 997                         //Platform::DebugPrintf("Steps=%d\n", steps);
 998                         int coalescedRemovePos = -1;
 999                         int coalescedRemoveLen = 0;
1000                         int prevRemoveActionPos = -1;
1001                         int prevRemoveActionLen = 0;
1002                         for (int step = 0; step < steps; step++) {
1003                                 const int prevLinesTotal = LinesTotal();
1004                                 const Action &action = cb.GetUndoStep();
1005                                 if (action.at == removeAction) {
1006                                         NotifyModified(DocModification(
1007                                                                         SC_MOD_BEFOREINSERT | SC_PERFORMED_UNDO, action));
1008                                 } else if (action.at == containerAction) {
1009                                         DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_UNDO);
1010                                         dm.token = action.position;
1011                                         NotifyModified(dm);
1012                                         if (!action.mayCoalesce) {
1013                                                 coalescedRemovePos = -1;
1014                                                 coalescedRemoveLen = 0;
1015                                                 prevRemoveActionPos = -1;
1016                                                 prevRemoveActionLen = 0;
1017                                         }
1018                                 } else {
1019                                         NotifyModified(DocModification(
1020                                                                         SC_MOD_BEFOREDELETE | SC_PERFORMED_UNDO, action));
1021                                 }
1022                                 cb.PerformUndoStep();
1023                                 if (action.at != containerAction) {
1024                                         ModifiedAt(action.position);
1025                                         newPos = action.position;
1026                                 }
1027
1028                                 int modFlags = SC_PERFORMED_UNDO;
1029                                 // With undo, an insertion action becomes a deletion notification
1030                                 if (action.at == removeAction) {
1031                                         newPos += action.lenData;
1032                                         modFlags |= SC_MOD_INSERTTEXT;
1033                                         if ((coalescedRemoveLen > 0) &&
1034                                                 (action.position == prevRemoveActionPos || action.position == (prevRemoveActionPos + prevRemoveActionLen))) {
1035                                                 coalescedRemoveLen += action.lenData;
1036                                                 newPos = coalescedRemovePos + coalescedRemoveLen;
1037                                         } else {
1038                                                 coalescedRemovePos = action.position;
1039                                                 coalescedRemoveLen = action.lenData;
1040                                         }
1041                                         prevRemoveActionPos = action.position;
1042                                         prevRemoveActionLen = action.lenData;
1043                                 } else if (action.at == insertAction) {
1044                                         modFlags |= SC_MOD_DELETETEXT;
1045                                         coalescedRemovePos = -1;
1046                                         coalescedRemoveLen = 0;
1047                                         prevRemoveActionPos = -1;
1048                                         prevRemoveActionLen = 0;
1049                                 }
1050                                 if (steps > 1)
1051                                         modFlags |= SC_MULTISTEPUNDOREDO;
1052                                 const int linesAdded = LinesTotal() - prevLinesTotal;
1053                                 if (linesAdded != 0)
1054                                         multiLine = true;
1055                                 if (step == steps - 1) {
1056                                         modFlags |= SC_LASTSTEPINUNDOREDO;
1057                                         if (multiLine)
1058                                                 modFlags |= SC_MULTILINEUNDOREDO;
1059                                 }
1060                                 NotifyModified(DocModification(modFlags, action.position, action.lenData,
1061                                                                                            linesAdded, action.data));
1062                         }
1063
1064                         bool endSavePoint = cb.IsSavePoint();
1065                         if (startSavePoint != endSavePoint)
1066                                 NotifySavePoint(endSavePoint);
1067                 }
1068                 enteredModification--;
1069         }
1070         return newPos;
1071 }
1072
1073 int Document::Redo() {
1074         int newPos = -1;
1075         CheckReadOnly();
1076         if ((enteredModification == 0) && (cb.IsCollectingUndo())) {
1077                 enteredModification++;
1078                 if (!cb.IsReadOnly()) {
1079                         bool startSavePoint = cb.IsSavePoint();
1080                         bool multiLine = false;
1081                         int steps = cb.StartRedo();
1082                         for (int step = 0; step < steps; step++) {
1083                                 const int prevLinesTotal = LinesTotal();
1084                                 const Action &action = cb.GetRedoStep();
1085                                 if (action.at == insertAction) {
1086                                         NotifyModified(DocModification(
1087                                                                         SC_MOD_BEFOREINSERT | SC_PERFORMED_REDO, action));
1088                                 } else if (action.at == containerAction) {
1089                                         DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_REDO);
1090                                         dm.token = action.position;
1091                                         NotifyModified(dm);
1092                                 } else {
1093                                         NotifyModified(DocModification(
1094                                                                         SC_MOD_BEFOREDELETE | SC_PERFORMED_REDO, action));
1095                                 }
1096                                 cb.PerformRedoStep();
1097                                 if (action.at != containerAction) {
1098                                         ModifiedAt(action.position);
1099                                         newPos = action.position;
1100                                 }
1101
1102                                 int modFlags = SC_PERFORMED_REDO;
1103                                 if (action.at == insertAction) {
1104                                         newPos += action.lenData;
1105                                         modFlags |= SC_MOD_INSERTTEXT;
1106                                 } else if (action.at == removeAction) {
1107                                         modFlags |= SC_MOD_DELETETEXT;
1108                                 }
1109                                 if (steps > 1)
1110                                         modFlags |= SC_MULTISTEPUNDOREDO;
1111                                 const int linesAdded = LinesTotal() - prevLinesTotal;
1112                                 if (linesAdded != 0)
1113                                         multiLine = true;
1114                                 if (step == steps - 1) {
1115                                         modFlags |= SC_LASTSTEPINUNDOREDO;
1116                                         if (multiLine)
1117                                                 modFlags |= SC_MULTILINEUNDOREDO;
1118                                 }
1119                                 NotifyModified(
1120                                         DocModification(modFlags, action.position, action.lenData,
1121                                                                         linesAdded, action.data));
1122                         }
1123
1124                         bool endSavePoint = cb.IsSavePoint();
1125                         if (startSavePoint != endSavePoint)
1126                                 NotifySavePoint(endSavePoint);
1127                 }
1128                 enteredModification--;
1129         }
1130         return newPos;
1131 }
1132
1133 void Document::DelChar(int pos) {
1134         DeleteChars(pos, LenChar(pos));
1135 }
1136
1137 void Document::DelCharBack(int pos) {
1138         if (pos <= 0) {
1139                 return;
1140         } else if (IsCrLf(pos - 2)) {
1141                 DeleteChars(pos - 2, 2);
1142         } else if (dbcsCodePage) {
1143                 int startChar = NextPosition(pos, -1);
1144                 DeleteChars(startChar, pos - startChar);
1145         } else {
1146                 DeleteChars(pos - 1, 1);
1147         }
1148 }
1149
1150 static int NextTab(int pos, int tabSize) {
1151         return ((pos / tabSize) + 1) * tabSize;
1152 }
1153
1154 static std::string CreateIndentation(int indent, int tabSize, bool insertSpaces) {
1155         std::string indentation;
1156         if (!insertSpaces) {
1157                 while (indent >= tabSize) {
1158                         indentation += '\t';
1159                         indent -= tabSize;
1160                 }
1161         }
1162         while (indent > 0) {
1163                 indentation += ' ';
1164                 indent--;
1165         }
1166         return indentation;
1167 }
1168
1169 int SCI_METHOD Document::GetLineIndentation(int line) {
1170         int indent = 0;
1171         if ((line >= 0) && (line < LinesTotal())) {
1172                 int lineStart = LineStart(line);
1173                 int length = Length();
1174                 for (int i = lineStart; i < length; i++) {
1175                         char ch = cb.CharAt(i);
1176                         if (ch == ' ')
1177                                 indent++;
1178                         else if (ch == '\t')
1179                                 indent = NextTab(indent, tabInChars);
1180                         else
1181                                 return indent;
1182                 }
1183         }
1184         return indent;
1185 }
1186
1187 int Document::SetLineIndentation(int line, int indent) {
1188         int indentOfLine = GetLineIndentation(line);
1189         if (indent < 0)
1190                 indent = 0;
1191         if (indent != indentOfLine) {
1192                 std::string linebuf = CreateIndentation(indent, tabInChars, !useTabs);
1193                 int thisLineStart = LineStart(line);
1194                 int indentPos = GetLineIndentPosition(line);
1195                 UndoGroup ug(this);
1196                 DeleteChars(thisLineStart, indentPos - thisLineStart);
1197                 return thisLineStart + InsertString(thisLineStart, linebuf.c_str(),
1198                         static_cast<int>(linebuf.length()));
1199         } else {
1200                 return GetLineIndentPosition(line);
1201         }
1202 }
1203
1204 int Document::GetLineIndentPosition(int line) const {
1205         if (line < 0)
1206                 return 0;
1207         int pos = LineStart(line);
1208         int length = Length();
1209         while ((pos < length) && IsSpaceOrTab(cb.CharAt(pos))) {
1210                 pos++;
1211         }
1212         return pos;
1213 }
1214
1215 int Document::GetColumn(int pos) {
1216         int column = 0;
1217         int line = LineFromPosition(pos);
1218         if ((line >= 0) && (line < LinesTotal())) {
1219                 for (int i = LineStart(line); i < pos;) {
1220                         char ch = cb.CharAt(i);
1221                         if (ch == '\t') {
1222                                 column = NextTab(column, tabInChars);
1223                                 i++;
1224                         } else if (ch == '\r') {
1225                                 return column;
1226                         } else if (ch == '\n') {
1227                                 return column;
1228                         } else if (i >= Length()) {
1229                                 return column;
1230                         } else {
1231                                 column++;
1232                                 i = NextPosition(i, 1);
1233                         }
1234                 }
1235         }
1236         return column;
1237 }
1238
1239 int Document::CountCharacters(int startPos, int endPos) {
1240         startPos = MovePositionOutsideChar(startPos, 1, false);
1241         endPos = MovePositionOutsideChar(endPos, -1, false);
1242         int count = 0;
1243         int i = startPos;
1244         while (i < endPos) {
1245                 count++;
1246                 if (IsCrLf(i))
1247                         i++;
1248                 i = NextPosition(i, 1);
1249         }
1250         return count;
1251 }
1252
1253 int Document::FindColumn(int line, int column) {
1254         int position = LineStart(line);
1255         if ((line >= 0) && (line < LinesTotal())) {
1256                 int columnCurrent = 0;
1257                 while ((columnCurrent < column) && (position < Length())) {
1258                         char ch = cb.CharAt(position);
1259                         if (ch == '\t') {
1260                                 columnCurrent = NextTab(columnCurrent, tabInChars);
1261                                 if (columnCurrent > column)
1262                                         return position;
1263                                 position++;
1264                         } else if (ch == '\r') {
1265                                 return position;
1266                         } else if (ch == '\n') {
1267                                 return position;
1268                         } else {
1269                                 columnCurrent++;
1270                                 position = NextPosition(position, 1);
1271                         }
1272                 }
1273         }
1274         return position;
1275 }
1276
1277 void Document::Indent(bool forwards, int lineBottom, int lineTop) {
1278         // Dedent - suck white space off the front of the line to dedent by equivalent of a tab
1279         for (int line = lineBottom; line >= lineTop; line--) {
1280                 int indentOfLine = GetLineIndentation(line);
1281                 if (forwards) {
1282                         if (LineStart(line) < LineEnd(line)) {
1283                                 SetLineIndentation(line, indentOfLine + IndentSize());
1284                         }
1285                 } else {
1286                         SetLineIndentation(line, indentOfLine - IndentSize());
1287                 }
1288         }
1289 }
1290
1291 // Convert line endings for a piece of text to a particular mode.
1292 // Stop at len or when a NUL is found.
1293 std::string Document::TransformLineEnds(const char *s, size_t len, int eolModeWanted) {
1294         std::string dest;
1295         for (size_t i = 0; (i < len) && (s[i]); i++) {
1296                 if (s[i] == '\n' || s[i] == '\r') {
1297                         if (eolModeWanted == SC_EOL_CR) {
1298                                 dest.push_back('\r');
1299                         } else if (eolModeWanted == SC_EOL_LF) {
1300                                 dest.push_back('\n');
1301                         } else { // eolModeWanted == SC_EOL_CRLF
1302                                 dest.push_back('\r');
1303                                 dest.push_back('\n');
1304                         }
1305                         if ((s[i] == '\r') && (i+1 < len) && (s[i+1] == '\n')) {
1306                                 i++;
1307                         }
1308                 } else {
1309                         dest.push_back(s[i]);
1310                 }
1311         }
1312         return dest;
1313 }
1314
1315 void Document::ConvertLineEnds(int eolModeSet) {
1316         UndoGroup ug(this);
1317
1318         for (int pos = 0; pos < Length(); pos++) {
1319                 if (cb.CharAt(pos) == '\r') {
1320                         if (cb.CharAt(pos + 1) == '\n') {
1321                                 // CRLF
1322                                 if (eolModeSet == SC_EOL_CR) {
1323                                         DeleteChars(pos + 1, 1); // Delete the LF
1324                                 } else if (eolModeSet == SC_EOL_LF) {
1325                                         DeleteChars(pos, 1); // Delete the CR
1326                                 } else {
1327                                         pos++;
1328                                 }
1329                         } else {
1330                                 // CR
1331                                 if (eolModeSet == SC_EOL_CRLF) {
1332                                         pos += InsertString(pos + 1, "\n", 1); // Insert LF
1333                                 } else if (eolModeSet == SC_EOL_LF) {
1334                                         pos += InsertString(pos, "\n", 1); // Insert LF
1335                                         DeleteChars(pos, 1); // Delete CR
1336                                         pos--;
1337                                 }
1338                         }
1339                 } else if (cb.CharAt(pos) == '\n') {
1340                         // LF
1341                         if (eolModeSet == SC_EOL_CRLF) {
1342                                 pos += InsertString(pos, "\r", 1); // Insert CR
1343                         } else if (eolModeSet == SC_EOL_CR) {
1344                                 pos += InsertString(pos, "\r", 1); // Insert CR
1345                                 DeleteChars(pos, 1); // Delete LF
1346                                 pos--;
1347                         }
1348                 }
1349         }
1350
1351 }
1352
1353 bool Document::IsWhiteLine(int line) const {
1354         int currentChar = LineStart(line);
1355         int endLine = LineEnd(line);
1356         while (currentChar < endLine) {
1357                 if (cb.CharAt(currentChar) != ' ' && cb.CharAt(currentChar) != '\t') {
1358                         return false;
1359                 }
1360                 ++currentChar;
1361         }
1362         return true;
1363 }
1364
1365 int Document::ParaUp(int pos) const {
1366         int line = LineFromPosition(pos);
1367         line--;
1368         while (line >= 0 && IsWhiteLine(line)) { // skip empty lines
1369                 line--;
1370         }
1371         while (line >= 0 && !IsWhiteLine(line)) { // skip non-empty lines
1372                 line--;
1373         }
1374         line++;
1375         return LineStart(line);
1376 }
1377
1378 int Document::ParaDown(int pos) const {
1379         int line = LineFromPosition(pos);
1380         while (line < LinesTotal() && !IsWhiteLine(line)) { // skip non-empty lines
1381                 line++;
1382         }
1383         while (line < LinesTotal() && IsWhiteLine(line)) { // skip empty lines
1384                 line++;
1385         }
1386         if (line < LinesTotal())
1387                 return LineStart(line);
1388         else // end of a document
1389                 return LineEnd(line-1);
1390 }
1391
1392 CharClassify::cc Document::WordCharClass(unsigned char ch) const {
1393         if ((SC_CP_UTF8 == dbcsCodePage) && (!UTF8IsAscii(ch)))
1394                 return CharClassify::ccWord;
1395         return charClass.GetClass(ch);
1396 }
1397
1398 /**
1399  * Used by commmands that want to select whole words.
1400  * Finds the start of word at pos when delta < 0 or the end of the word when delta >= 0.
1401  */
1402 int Document::ExtendWordSelect(int pos, int delta, bool onlyWordCharacters) {
1403         CharClassify::cc ccStart = CharClassify::ccWord;
1404         if (delta < 0) {
1405                 if (!onlyWordCharacters)
1406                         ccStart = WordCharClass(cb.CharAt(pos-1));
1407                 while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == ccStart))
1408                         pos--;
1409         } else {
1410                 if (!onlyWordCharacters && pos < Length())
1411                         ccStart = WordCharClass(cb.CharAt(pos));
1412                 while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == ccStart))
1413                         pos++;
1414         }
1415         return MovePositionOutsideChar(pos, delta, true);
1416 }
1417
1418 /**
1419  * Find the start of the next word in either a forward (delta >= 0) or backwards direction
1420  * (delta < 0).
1421  * This is looking for a transition between character classes although there is also some
1422  * additional movement to transit white space.
1423  * Used by cursor movement by word commands.
1424  */
1425 int Document::NextWordStart(int pos, int delta) {
1426         if (delta < 0) {
1427                 while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == CharClassify::ccSpace))
1428                         pos--;
1429                 if (pos > 0) {
1430                         CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos-1));
1431                         while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == ccStart)) {
1432                                 pos--;
1433                         }
1434                 }
1435         } else {
1436                 CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos));
1437                 while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == ccStart))
1438                         pos++;
1439                 while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == CharClassify::ccSpace))
1440                         pos++;
1441         }
1442         return pos;
1443 }
1444
1445 /**
1446  * Find the end of the next word in either a forward (delta >= 0) or backwards direction
1447  * (delta < 0).
1448  * This is looking for a transition between character classes although there is also some
1449  * additional movement to transit white space.
1450  * Used by cursor movement by word commands.
1451  */
1452 int Document::NextWordEnd(int pos, int delta) {
1453         if (delta < 0) {
1454                 if (pos > 0) {
1455                         CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos-1));
1456                         if (ccStart != CharClassify::ccSpace) {
1457                                 while (pos > 0 && WordCharClass(cb.CharAt(pos - 1)) == ccStart) {
1458                                         pos--;
1459                                 }
1460                         }
1461                         while (pos > 0 && WordCharClass(cb.CharAt(pos - 1)) == CharClassify::ccSpace) {
1462                                 pos--;
1463                         }
1464                 }
1465         } else {
1466                 while (pos < Length() && WordCharClass(cb.CharAt(pos)) == CharClassify::ccSpace) {
1467                         pos++;
1468                 }
1469                 if (pos < Length()) {
1470                         CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos));
1471                         while (pos < Length() && WordCharClass(cb.CharAt(pos)) == ccStart) {
1472                                 pos++;
1473                         }
1474                 }
1475         }
1476         return pos;
1477 }
1478
1479 /**
1480  * Check that the character at the given position is a word or punctuation character and that
1481  * the previous character is of a different character class.
1482  */
1483 bool Document::IsWordStartAt(int pos) const {
1484         if (pos > 0) {
1485                 CharClassify::cc ccPos = WordCharClass(CharAt(pos));
1486                 return (ccPos == CharClassify::ccWord || ccPos == CharClassify::ccPunctuation) &&
1487                         (ccPos != WordCharClass(CharAt(pos - 1)));
1488         }
1489         return true;
1490 }
1491
1492 /**
1493  * Check that the character at the given position is a word or punctuation character and that
1494  * the next character is of a different character class.
1495  */
1496 bool Document::IsWordEndAt(int pos) const {
1497         if (pos < Length()) {
1498                 CharClassify::cc ccPrev = WordCharClass(CharAt(pos-1));
1499                 return (ccPrev == CharClassify::ccWord || ccPrev == CharClassify::ccPunctuation) &&
1500                         (ccPrev != WordCharClass(CharAt(pos)));
1501         }
1502         return true;
1503 }
1504
1505 /**
1506  * Check that the given range is has transitions between character classes at both
1507  * ends and where the characters on the inside are word or punctuation characters.
1508  */
1509 bool Document::IsWordAt(int start, int end) const {
1510         return IsWordStartAt(start) && IsWordEndAt(end);
1511 }
1512
1513 bool Document::MatchesWordOptions(bool word, bool wordStart, int pos, int length) const {
1514         return (!word && !wordStart) ||
1515                         (word && IsWordAt(pos, pos + length)) ||
1516                         (wordStart && IsWordStartAt(pos));
1517 }
1518
1519 bool Document::HasCaseFolder(void) const {
1520         return pcf != 0;
1521 }
1522
1523 void Document::SetCaseFolder(CaseFolder *pcf_) {
1524         delete pcf;
1525         pcf = pcf_;
1526 }
1527
1528 /**
1529  * Find text in document, supporting both forward and backward
1530  * searches (just pass minPos > maxPos to do a backward search)
1531  * Has not been tested with backwards DBCS searches yet.
1532  */
1533 long Document::FindText(int minPos, int maxPos, const char *search,
1534                         bool caseSensitive, bool word, bool wordStart, bool regExp, int flags,
1535                         int *length) {
1536         if (*length <= 0)
1537                 return minPos;
1538         if (regExp) {
1539                 if (!regex)
1540                         regex = CreateRegexSearch(&charClass);
1541                 return regex->FindText(this, minPos, maxPos, search, caseSensitive, word, wordStart, flags, length);
1542         } else {
1543
1544                 const bool forward = minPos <= maxPos;
1545                 const int increment = forward ? 1 : -1;
1546
1547                 // Range endpoints should not be inside DBCS characters, but just in case, move them.
1548                 const int startPos = MovePositionOutsideChar(minPos, increment, false);
1549                 const int endPos = MovePositionOutsideChar(maxPos, increment, false);
1550
1551                 // Compute actual search ranges needed
1552                 const int lengthFind = *length;
1553
1554                 //Platform::DebugPrintf("Find %d %d %s %d\n", startPos, endPos, ft->lpstrText, lengthFind);
1555                 const int limitPos = Platform::Maximum(startPos, endPos);
1556                 int pos = startPos;
1557                 if (!forward) {
1558                         // Back all of a character
1559                         pos = NextPosition(pos, increment);
1560                 }
1561                 if (caseSensitive) {
1562                         const int endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos;
1563                         const char charStartSearch =  search[0];
1564                         while (forward ? (pos < endSearch) : (pos >= endSearch)) {
1565                                 if (CharAt(pos) == charStartSearch) {
1566                                         bool found = (pos + lengthFind) <= limitPos;
1567                                         for (int indexSearch = 1; (indexSearch < lengthFind) && found; indexSearch++) {
1568                                                 found = CharAt(pos + indexSearch) == search[indexSearch];
1569                                         }
1570                                         if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
1571                                                 return pos;
1572                                         }
1573                                 }
1574                                 if (!NextCharacter(pos, increment))
1575                                         break;
1576                         }
1577                 } else if (SC_CP_UTF8 == dbcsCodePage) {
1578                         const size_t maxFoldingExpansion = 4;
1579                         std::vector<char> searchThing(lengthFind * UTF8MaxBytes * maxFoldingExpansion + 1);
1580                         const int lenSearch = static_cast<int>(
1581                                 pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind));
1582                         char bytes[UTF8MaxBytes + 1];
1583                         char folded[UTF8MaxBytes * maxFoldingExpansion + 1];
1584                         while (forward ? (pos < endPos) : (pos >= endPos)) {
1585                                 int widthFirstCharacter = 0;
1586                                 int posIndexDocument = pos;
1587                                 int indexSearch = 0;
1588                                 bool characterMatches = true;
1589                                 for (;;) {
1590                                         const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(posIndexDocument));
1591                                         bytes[0] = leadByte;
1592                                         int widthChar = 1;
1593                                         if (!UTF8IsAscii(leadByte)) {
1594                                                 const int widthCharBytes = UTF8BytesOfLead[leadByte];
1595                                                 for (int b=1; b<widthCharBytes; b++) {
1596                                                         bytes[b] = cb.CharAt(posIndexDocument+b);
1597                                                 }
1598                                                 widthChar = UTF8Classify(reinterpret_cast<const unsigned char *>(bytes), widthCharBytes) & UTF8MaskWidth;
1599                                         }
1600                                         if (!widthFirstCharacter)
1601                                                 widthFirstCharacter = widthChar;
1602                                         if ((posIndexDocument + widthChar) > limitPos)
1603                                                 break;
1604                                         const int lenFlat = static_cast<int>(pcf->Fold(folded, sizeof(folded), bytes, widthChar));
1605                                         folded[lenFlat] = 0;
1606                                         // Does folded match the buffer
1607                                         characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat);
1608                                         if (!characterMatches)
1609                                                 break;
1610                                         posIndexDocument += widthChar;
1611                                         indexSearch += lenFlat;
1612                                         if (indexSearch >= lenSearch)
1613                                                 break;
1614                                 }
1615                                 if (characterMatches && (indexSearch == static_cast<int>(lenSearch))) {
1616                                         if (MatchesWordOptions(word, wordStart, pos, posIndexDocument - pos)) {
1617                                                 *length = posIndexDocument - pos;
1618                                                 return pos;
1619                                         }
1620                                 }
1621                                 if (forward) {
1622                                         pos += widthFirstCharacter;
1623                                 } else {
1624                                         if (!NextCharacter(pos, increment))
1625                                                 break;
1626                                 }
1627                         }
1628                 } else if (dbcsCodePage) {
1629                         const size_t maxBytesCharacter = 2;
1630                         const size_t maxFoldingExpansion = 4;
1631                         std::vector<char> searchThing(lengthFind * maxBytesCharacter * maxFoldingExpansion + 1);
1632                         const int lenSearch = static_cast<int>(
1633                                 pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind));
1634                         while (forward ? (pos < endPos) : (pos >= endPos)) {
1635                                 int indexDocument = 0;
1636                                 int indexSearch = 0;
1637                                 bool characterMatches = true;
1638                                 while (characterMatches &&
1639                                         ((pos + indexDocument) < limitPos) &&
1640                                         (indexSearch < lenSearch)) {
1641                                         char bytes[maxBytesCharacter + 1];
1642                                         bytes[0] = cb.CharAt(pos + indexDocument);
1643                                         const int widthChar = IsDBCSLeadByte(bytes[0]) ? 2 : 1;
1644                                         if (widthChar == 2)
1645                                                 bytes[1] = cb.CharAt(pos + indexDocument + 1);
1646                                         if ((pos + indexDocument + widthChar) > limitPos)
1647                                                 break;
1648                                         char folded[maxBytesCharacter * maxFoldingExpansion + 1];
1649                                         const int lenFlat = static_cast<int>(pcf->Fold(folded, sizeof(folded), bytes, widthChar));
1650                                         folded[lenFlat] = 0;
1651                                         // Does folded match the buffer
1652                                         characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat);
1653                                         indexDocument += widthChar;
1654                                         indexSearch += lenFlat;
1655                                 }
1656                                 if (characterMatches && (indexSearch == static_cast<int>(lenSearch))) {
1657                                         if (MatchesWordOptions(word, wordStart, pos, indexDocument)) {
1658                                                 *length = indexDocument;
1659                                                 return pos;
1660                                         }
1661                                 }
1662                                 if (!NextCharacter(pos, increment))
1663                                         break;
1664                         }
1665                 } else {
1666                         const int endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos;
1667                         std::vector<char> searchThing(lengthFind + 1);
1668                         pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind);
1669                         while (forward ? (pos < endSearch) : (pos >= endSearch)) {
1670                                 bool found = (pos + lengthFind) <= limitPos;
1671                                 for (int indexSearch = 0; (indexSearch < lengthFind) && found; indexSearch++) {
1672                                         char ch = CharAt(pos + indexSearch);
1673                                         char folded[2];
1674                                         pcf->Fold(folded, sizeof(folded), &ch, 1);
1675                                         found = folded[0] == searchThing[indexSearch];
1676                                 }
1677                                 if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
1678                                         return pos;
1679                                 }
1680                                 if (!NextCharacter(pos, increment))
1681                                         break;
1682                         }
1683                 }
1684         }
1685         //Platform::DebugPrintf("Not found\n");
1686         return -1;
1687 }
1688
1689 const char *Document::SubstituteByPosition(const char *text, int *length) {
1690         if (regex)
1691                 return regex->SubstituteByPosition(this, text, length);
1692         else
1693                 return 0;
1694 }
1695
1696 int Document::LinesTotal() const {
1697         return cb.Lines();
1698 }
1699
1700 void Document::SetDefaultCharClasses(bool includeWordClass) {
1701     charClass.SetDefaultCharClasses(includeWordClass);
1702 }
1703
1704 void Document::SetCharClasses(const unsigned char *chars, CharClassify::cc newCharClass) {
1705     charClass.SetCharClasses(chars, newCharClass);
1706 }
1707
1708 int Document::GetCharsOfClass(CharClassify::cc characterClass, unsigned char *buffer) {
1709     return charClass.GetCharsOfClass(characterClass, buffer);
1710 }
1711
1712 void SCI_METHOD Document::StartStyling(int position, char) {
1713         endStyled = position;
1714 }
1715
1716 bool SCI_METHOD Document::SetStyleFor(int length, char style) {
1717         if (enteredStyling != 0) {
1718                 return false;
1719         } else {
1720                 enteredStyling++;
1721                 int prevEndStyled = endStyled;
1722                 if (cb.SetStyleFor(endStyled, length, style)) {
1723                         DocModification mh(SC_MOD_CHANGESTYLE | SC_PERFORMED_USER,
1724                                            prevEndStyled, length);
1725                         NotifyModified(mh);
1726                 }
1727                 endStyled += length;
1728                 enteredStyling--;
1729                 return true;
1730         }
1731 }
1732
1733 bool SCI_METHOD Document::SetStyles(int length, const char *styles) {
1734         if (enteredStyling != 0) {
1735                 return false;
1736         } else {
1737                 enteredStyling++;
1738                 bool didChange = false;
1739                 int startMod = 0;
1740                 int endMod = 0;
1741                 for (int iPos = 0; iPos < length; iPos++, endStyled++) {
1742                         PLATFORM_ASSERT(endStyled < Length());
1743                         if (cb.SetStyleAt(endStyled, styles[iPos])) {
1744                                 if (!didChange) {
1745                                         startMod = endStyled;
1746                                 }
1747                                 didChange = true;
1748                                 endMod = endStyled;
1749                         }
1750                 }
1751                 if (didChange) {
1752                         DocModification mh(SC_MOD_CHANGESTYLE | SC_PERFORMED_USER,
1753                                            startMod, endMod - startMod + 1);
1754                         NotifyModified(mh);
1755                 }
1756                 enteredStyling--;
1757                 return true;
1758         }
1759 }
1760
1761 void Document::EnsureStyledTo(int pos) {
1762         if ((enteredStyling == 0) && (pos > GetEndStyled())) {
1763                 IncrementStyleClock();
1764                 if (pli && !pli->UseContainerLexing()) {
1765                         int lineEndStyled = LineFromPosition(GetEndStyled());
1766                         int endStyledTo = LineStart(lineEndStyled);
1767                         pli->Colourise(endStyledTo, pos);
1768                 } else {
1769                         // Ask the watchers to style, and stop as soon as one responds.
1770                         for (std::vector<WatcherWithUserData>::iterator it = watchers.begin();
1771                                 (pos > GetEndStyled()) && (it != watchers.end()); ++it) {
1772                                 it->watcher->NotifyStyleNeeded(this, it->userData, pos);
1773                         }
1774                 }
1775         }
1776 }
1777
1778 void Document::LexerChanged() {
1779         // Tell the watchers the lexer has changed.
1780         for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
1781                 it->watcher->NotifyLexerChanged(this, it->userData);
1782         }
1783 }
1784
1785 int SCI_METHOD Document::SetLineState(int line, int state) {
1786         int statePrevious = static_cast<LineState *>(perLineData[ldState])->SetLineState(line, state);
1787         if (state != statePrevious) {
1788                 DocModification mh(SC_MOD_CHANGELINESTATE, LineStart(line), 0, 0, 0, line);
1789                 NotifyModified(mh);
1790         }
1791         return statePrevious;
1792 }
1793
1794 int SCI_METHOD Document::GetLineState(int line) const {
1795         return static_cast<LineState *>(perLineData[ldState])->GetLineState(line);
1796 }
1797
1798 int Document::GetMaxLineState() {
1799         return static_cast<LineState *>(perLineData[ldState])->GetMaxLineState();
1800 }
1801
1802 void SCI_METHOD Document::ChangeLexerState(int start, int end) {
1803         DocModification mh(SC_MOD_LEXERSTATE, start, end-start, 0, 0, 0);
1804         NotifyModified(mh);
1805 }
1806
1807 StyledText Document::MarginStyledText(int line) const {
1808         LineAnnotation *pla = static_cast<LineAnnotation *>(perLineData[ldMargin]);
1809         return StyledText(pla->Length(line), pla->Text(line),
1810                 pla->MultipleStyles(line), pla->Style(line), pla->Styles(line));
1811 }
1812
1813 void Document::MarginSetText(int line, const char *text) {
1814         static_cast<LineAnnotation *>(perLineData[ldMargin])->SetText(line, text);
1815         DocModification mh(SC_MOD_CHANGEMARGIN, LineStart(line), 0, 0, 0, line);
1816         NotifyModified(mh);
1817 }
1818
1819 void Document::MarginSetStyle(int line, int style) {
1820         static_cast<LineAnnotation *>(perLineData[ldMargin])->SetStyle(line, style);
1821         NotifyModified(DocModification(SC_MOD_CHANGEMARGIN, LineStart(line), 0, 0, 0, line));
1822 }
1823
1824 void Document::MarginSetStyles(int line, const unsigned char *styles) {
1825         static_cast<LineAnnotation *>(perLineData[ldMargin])->SetStyles(line, styles);
1826         NotifyModified(DocModification(SC_MOD_CHANGEMARGIN, LineStart(line), 0, 0, 0, line));
1827 }
1828
1829 void Document::MarginClearAll() {
1830         int maxEditorLine = LinesTotal();
1831         for (int l=0; l<maxEditorLine; l++)
1832                 MarginSetText(l, 0);
1833         // Free remaining data
1834         static_cast<LineAnnotation *>(perLineData[ldMargin])->ClearAll();
1835 }
1836
1837 StyledText Document::AnnotationStyledText(int line) const {
1838         LineAnnotation *pla = static_cast<LineAnnotation *>(perLineData[ldAnnotation]);
1839         return StyledText(pla->Length(line), pla->Text(line),
1840                 pla->MultipleStyles(line), pla->Style(line), pla->Styles(line));
1841 }
1842
1843 void Document::AnnotationSetText(int line, const char *text) {
1844         if (line >= 0 && line < LinesTotal()) {
1845                 const int linesBefore = AnnotationLines(line);
1846                 static_cast<LineAnnotation *>(perLineData[ldAnnotation])->SetText(line, text);
1847                 const int linesAfter = AnnotationLines(line);
1848                 DocModification mh(SC_MOD_CHANGEANNOTATION, LineStart(line), 0, 0, 0, line);
1849                 mh.annotationLinesAdded = linesAfter - linesBefore;
1850                 NotifyModified(mh);
1851         }
1852 }
1853
1854 void Document::AnnotationSetStyle(int line, int style) {
1855         static_cast<LineAnnotation *>(perLineData[ldAnnotation])->SetStyle(line, style);
1856         DocModification mh(SC_MOD_CHANGEANNOTATION, LineStart(line), 0, 0, 0, line);
1857         NotifyModified(mh);
1858 }
1859
1860 void Document::AnnotationSetStyles(int line, const unsigned char *styles) {
1861         if (line >= 0 && line < LinesTotal()) {
1862                 static_cast<LineAnnotation *>(perLineData[ldAnnotation])->SetStyles(line, styles);
1863         }
1864 }
1865
1866 int Document::AnnotationLines(int line) const {
1867         return static_cast<LineAnnotation *>(perLineData[ldAnnotation])->Lines(line);
1868 }
1869
1870 void Document::AnnotationClearAll() {
1871         int maxEditorLine = LinesTotal();
1872         for (int l=0; l<maxEditorLine; l++)
1873                 AnnotationSetText(l, 0);
1874         // Free remaining data
1875         static_cast<LineAnnotation *>(perLineData[ldAnnotation])->ClearAll();
1876 }
1877
1878 void Document::IncrementStyleClock() {
1879         styleClock = (styleClock + 1) % 0x100000;
1880 }
1881
1882 void SCI_METHOD Document::DecorationFillRange(int position, int value, int fillLength) {
1883         if (decorations.FillRange(position, value, fillLength)) {
1884                 DocModification mh(SC_MOD_CHANGEINDICATOR | SC_PERFORMED_USER,
1885                                                         position, fillLength);
1886                 NotifyModified(mh);
1887         }
1888 }
1889
1890 bool Document::AddWatcher(DocWatcher *watcher, void *userData) {
1891         WatcherWithUserData wwud(watcher, userData);
1892         std::vector<WatcherWithUserData>::iterator it =
1893                 std::find(watchers.begin(), watchers.end(), wwud);
1894         if (it != watchers.end())
1895                 return false;
1896         watchers.push_back(wwud);
1897         return true;
1898 }
1899
1900 bool Document::RemoveWatcher(DocWatcher *watcher, void *userData) {
1901         std::vector<WatcherWithUserData>::iterator it =
1902                 std::find(watchers.begin(), watchers.end(), WatcherWithUserData(watcher, userData));
1903         if (it != watchers.end()) {
1904                 watchers.erase(it);
1905                 return true;
1906         }
1907         return false;
1908 }
1909
1910 void Document::NotifyModifyAttempt() {
1911         for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
1912                 it->watcher->NotifyModifyAttempt(this, it->userData);
1913         }
1914 }
1915
1916 void Document::NotifySavePoint(bool atSavePoint) {
1917         for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
1918                 it->watcher->NotifySavePoint(this, it->userData, atSavePoint);
1919         }
1920 }
1921
1922 void Document::NotifyModified(DocModification mh) {
1923         if (mh.modificationType & SC_MOD_INSERTTEXT) {
1924                 decorations.InsertSpace(mh.position, mh.length);
1925         } else if (mh.modificationType & SC_MOD_DELETETEXT) {
1926                 decorations.DeleteRange(mh.position, mh.length);
1927         }
1928         for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
1929                 it->watcher->NotifyModified(this, mh, it->userData);
1930         }
1931 }
1932
1933 bool Document::IsWordPartSeparator(char ch) const {
1934         return (WordCharClass(ch) == CharClassify::ccWord) && IsPunctuation(ch);
1935 }
1936
1937 int Document::WordPartLeft(int pos) {
1938         if (pos > 0) {
1939                 --pos;
1940                 char startChar = cb.CharAt(pos);
1941                 if (IsWordPartSeparator(startChar)) {
1942                         while (pos > 0 && IsWordPartSeparator(cb.CharAt(pos))) {
1943                                 --pos;
1944                         }
1945                 }
1946                 if (pos > 0) {
1947                         startChar = cb.CharAt(pos);
1948                         --pos;
1949                         if (IsLowerCase(startChar)) {
1950                                 while (pos > 0 && IsLowerCase(cb.CharAt(pos)))
1951                                         --pos;
1952                                 if (!IsUpperCase(cb.CharAt(pos)) && !IsLowerCase(cb.CharAt(pos)))
1953                                         ++pos;
1954                         } else if (IsUpperCase(startChar)) {
1955                                 while (pos > 0 && IsUpperCase(cb.CharAt(pos)))
1956                                         --pos;
1957                                 if (!IsUpperCase(cb.CharAt(pos)))
1958                                         ++pos;
1959                         } else if (IsADigit(startChar)) {
1960                                 while (pos > 0 && IsADigit(cb.CharAt(pos)))
1961                                         --pos;
1962                                 if (!IsADigit(cb.CharAt(pos)))
1963                                         ++pos;
1964                         } else if (IsPunctuation(startChar)) {
1965                                 while (pos > 0 && IsPunctuation(cb.CharAt(pos)))
1966                                         --pos;
1967                                 if (!IsPunctuation(cb.CharAt(pos)))
1968                                         ++pos;
1969                         } else if (isspacechar(startChar)) {
1970                                 while (pos > 0 && isspacechar(cb.CharAt(pos)))
1971                                         --pos;
1972                                 if (!isspacechar(cb.CharAt(pos)))
1973                                         ++pos;
1974                         } else if (!IsASCII(startChar)) {
1975                                 while (pos > 0 && !IsASCII(cb.CharAt(pos)))
1976                                         --pos;
1977                                 if (IsASCII(cb.CharAt(pos)))
1978                                         ++pos;
1979                         } else {
1980                                 ++pos;
1981                         }
1982                 }
1983         }
1984         return pos;
1985 }
1986
1987 int Document::WordPartRight(int pos) {
1988         char startChar = cb.CharAt(pos);
1989         int length = Length();
1990         if (IsWordPartSeparator(startChar)) {
1991                 while (pos < length && IsWordPartSeparator(cb.CharAt(pos)))
1992                         ++pos;
1993                 startChar = cb.CharAt(pos);
1994         }
1995         if (!IsASCII(startChar)) {
1996                 while (pos < length && !IsASCII(cb.CharAt(pos)))
1997                         ++pos;
1998         } else if (IsLowerCase(startChar)) {
1999                 while (pos < length && IsLowerCase(cb.CharAt(pos)))
2000                         ++pos;
2001         } else if (IsUpperCase(startChar)) {
2002                 if (IsLowerCase(cb.CharAt(pos + 1))) {
2003                         ++pos;
2004                         while (pos < length && IsLowerCase(cb.CharAt(pos)))
2005                                 ++pos;
2006                 } else {
2007                         while (pos < length && IsUpperCase(cb.CharAt(pos)))
2008                                 ++pos;
2009                 }
2010                 if (IsLowerCase(cb.CharAt(pos)) && IsUpperCase(cb.CharAt(pos - 1)))
2011                         --pos;
2012         } else if (IsADigit(startChar)) {
2013                 while (pos < length && IsADigit(cb.CharAt(pos)))
2014                         ++pos;
2015         } else if (IsPunctuation(startChar)) {
2016                 while (pos < length && IsPunctuation(cb.CharAt(pos)))
2017                         ++pos;
2018         } else if (isspacechar(startChar)) {
2019                 while (pos < length && isspacechar(cb.CharAt(pos)))
2020                         ++pos;
2021         } else {
2022                 ++pos;
2023         }
2024         return pos;
2025 }
2026
2027 bool IsLineEndChar(char c) {
2028         return (c == '\n' || c == '\r');
2029 }
2030
2031 int Document::ExtendStyleRange(int pos, int delta, bool singleLine) {
2032         int sStart = cb.StyleAt(pos);
2033         if (delta < 0) {
2034                 while (pos > 0 && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsLineEndChar(cb.CharAt(pos))))
2035                         pos--;
2036                 pos++;
2037         } else {
2038                 while (pos < (Length()) && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsLineEndChar(cb.CharAt(pos))))
2039                         pos++;
2040         }
2041         return pos;
2042 }
2043
2044 static char BraceOpposite(char ch) {
2045         switch (ch) {
2046         case '(':
2047                 return ')';
2048         case ')':
2049                 return '(';
2050         case '[':
2051                 return ']';
2052         case ']':
2053                 return '[';
2054         case '{':
2055                 return '}';
2056         case '}':
2057                 return '{';
2058         case '<':
2059                 return '>';
2060         case '>':
2061                 return '<';
2062         default:
2063                 return '\0';
2064         }
2065 }
2066
2067 // TODO: should be able to extend styled region to find matching brace
2068 int Document::BraceMatch(int position, int /*maxReStyle*/) {
2069         char chBrace = CharAt(position);
2070         char chSeek = BraceOpposite(chBrace);
2071         if (chSeek == '\0')
2072                 return - 1;
2073         char styBrace = static_cast<char>(StyleAt(position));
2074         int direction = -1;
2075         if (chBrace == '(' || chBrace == '[' || chBrace == '{' || chBrace == '<')
2076                 direction = 1;
2077         int depth = 1;
2078         position = NextPosition(position, direction);
2079         while ((position >= 0) && (position < Length())) {
2080                 char chAtPos = CharAt(position);
2081                 char styAtPos = static_cast<char>(StyleAt(position));
2082                 if ((position > GetEndStyled()) || (styAtPos == styBrace)) {
2083                         if (chAtPos == chBrace)
2084                                 depth++;
2085                         if (chAtPos == chSeek)
2086                                 depth--;
2087                         if (depth == 0)
2088                                 return position;
2089                 }
2090                 int positionBeforeMove = position;
2091                 position = NextPosition(position, direction);
2092                 if (position == positionBeforeMove)
2093                         break;
2094         }
2095         return - 1;
2096 }
2097
2098 /**
2099  * Implementation of RegexSearchBase for the default built-in regular expression engine
2100  */
2101 class BuiltinRegex : public RegexSearchBase {
2102 public:
2103         explicit BuiltinRegex(CharClassify *charClassTable) : search(charClassTable) {}
2104
2105         virtual ~BuiltinRegex() {
2106         }
2107
2108         virtual long FindText(Document *doc, int minPos, int maxPos, const char *s,
2109                         bool caseSensitive, bool word, bool wordStart, int flags,
2110                         int *length);
2111
2112         virtual const char *SubstituteByPosition(Document *doc, const char *text, int *length);
2113
2114 private:
2115         RESearch search;
2116         std::string substituted;
2117 };
2118
2119 // Define a way for the Regular Expression code to access the document
2120 class DocumentIndexer : public CharacterIndexer {
2121         Document *pdoc;
2122         int end;
2123 public:
2124         DocumentIndexer(Document *pdoc_, int end_) :
2125                 pdoc(pdoc_), end(end_) {
2126         }
2127
2128         virtual ~DocumentIndexer() {
2129         }
2130
2131         virtual char CharAt(int index) {
2132                 if (index < 0 || index >= end)
2133                         return 0;
2134                 else
2135                         return pdoc->CharAt(index);
2136         }
2137 };
2138
2139 long BuiltinRegex::FindText(Document *doc, int minPos, int maxPos, const char *s,
2140                         bool caseSensitive, bool, bool, int flags,
2141                         int *length) {
2142         bool posix = (flags & SCFIND_POSIX) != 0;
2143         int increment = (minPos <= maxPos) ? 1 : -1;
2144
2145         int startPos = minPos;
2146         int endPos = maxPos;
2147
2148         // Range endpoints should not be inside DBCS characters, but just in case, move them.
2149         startPos = doc->MovePositionOutsideChar(startPos, 1, false);
2150         endPos = doc->MovePositionOutsideChar(endPos, 1, false);
2151
2152         const char *errmsg = search.Compile(s, *length, caseSensitive, posix);
2153         if (errmsg) {
2154                 return -1;
2155         }
2156         // Find a variable in a property file: \$(\([A-Za-z0-9_.]+\))
2157         // Replace first '.' with '-' in each property file variable reference:
2158         //     Search: \$(\([A-Za-z0-9_-]+\)\.\([A-Za-z0-9_.]+\))
2159         //     Replace: $(\1-\2)
2160         int lineRangeStart = doc->LineFromPosition(startPos);
2161         int lineRangeEnd = doc->LineFromPosition(endPos);
2162         if ((increment == 1) &&
2163                 (startPos >= doc->LineEnd(lineRangeStart)) &&
2164                 (lineRangeStart < lineRangeEnd)) {
2165                 // the start position is at end of line or between line end characters.
2166                 lineRangeStart++;
2167                 startPos = doc->LineStart(lineRangeStart);
2168         } else if ((increment == -1) &&
2169                    (startPos <= doc->LineStart(lineRangeStart)) &&
2170                    (lineRangeStart > lineRangeEnd)) {
2171                 // the start position is at beginning of line.
2172                 lineRangeStart--;
2173                 startPos = doc->LineEnd(lineRangeStart);
2174         }
2175         int pos = -1;
2176         int lenRet = 0;
2177         char searchEnd = s[*length - 1];
2178         char searchEndPrev = (*length > 1) ? s[*length - 2] : '\0';
2179         int lineRangeBreak = lineRangeEnd + increment;
2180         for (int line = lineRangeStart; line != lineRangeBreak; line += increment) {
2181                 int startOfLine = doc->LineStart(line);
2182                 int endOfLine = doc->LineEnd(line);
2183                 if (increment == 1) {
2184                         if (line == lineRangeStart) {
2185                                 if ((startPos != startOfLine) && (s[0] == '^'))
2186                                         continue;       // Can't match start of line if start position after start of line
2187                                 startOfLine = startPos;
2188                         }
2189                         if (line == lineRangeEnd) {
2190                                 if ((endPos != endOfLine) && (searchEnd == '$') && (searchEndPrev != '\\'))
2191                                         continue;       // Can't match end of line if end position before end of line
2192                                 endOfLine = endPos;
2193                         }
2194                 } else {
2195                         if (line == lineRangeEnd) {
2196                                 if ((endPos != startOfLine) && (s[0] == '^'))
2197                                         continue;       // Can't match start of line if end position after start of line
2198                                 startOfLine = endPos;
2199                         }
2200                         if (line == lineRangeStart) {
2201                                 if ((startPos != endOfLine) && (searchEnd == '$') && (searchEndPrev != '\\'))
2202                                         continue;       // Can't match end of line if start position before end of line
2203                                 endOfLine = startPos;
2204                         }
2205                 }
2206
2207                 DocumentIndexer di(doc, endOfLine);
2208                 int success = search.Execute(di, startOfLine, endOfLine);
2209                 if (success) {
2210                         pos = search.bopat[0];
2211                         // Ensure only whole characters selected
2212                         search.eopat[0] = doc->MovePositionOutsideChar(search.eopat[0], 1, false);
2213                         lenRet = search.eopat[0] - search.bopat[0];
2214                         // There can be only one start of a line, so no need to look for last match in line
2215                         if ((increment == -1) && (s[0] != '^')) {
2216                                 // Check for the last match on this line.
2217                                 int repetitions = 1000; // Break out of infinite loop
2218                                 while (success && (search.eopat[0] <= endOfLine) && (repetitions--)) {
2219                                         success = search.Execute(di, pos+1, endOfLine);
2220                                         if (success) {
2221                                                 if (search.eopat[0] <= minPos) {
2222                                                         pos = search.bopat[0];
2223                                                         lenRet = search.eopat[0] - search.bopat[0];
2224                                                 } else {
2225                                                         success = 0;
2226                                                 }
2227                                         }
2228                                 }
2229                         }
2230                         break;
2231                 }
2232         }
2233         *length = lenRet;
2234         return pos;
2235 }
2236
2237 const char *BuiltinRegex::SubstituteByPosition(Document *doc, const char *text, int *length) {
2238         substituted.clear();
2239         DocumentIndexer di(doc, doc->Length());
2240         search.GrabMatches(di);
2241         for (int j = 0; j < *length; j++) {
2242                 if (text[j] == '\\') {
2243                         if (text[j + 1] >= '0' && text[j + 1] <= '9') {
2244                                 unsigned int patNum = text[j + 1] - '0';
2245                                 unsigned int len = search.eopat[patNum] - search.bopat[patNum];
2246                                 if (!search.pat[patNum].empty())        // Will be null if try for a match that did not occur
2247                                         substituted.append(search.pat[patNum].c_str(), len);
2248                                 j++;
2249                         } else {
2250                                 j++;
2251                                 switch (text[j]) {
2252                                 case 'a':
2253                                         substituted.push_back('\a');
2254                                         break;
2255                                 case 'b':
2256                                         substituted.push_back('\b');
2257                                         break;
2258                                 case 'f':
2259                                         substituted.push_back('\f');
2260                                         break;
2261                                 case 'n':
2262                                         substituted.push_back('\n');
2263                                         break;
2264                                 case 'r':
2265                                         substituted.push_back('\r');
2266                                         break;
2267                                 case 't':
2268                                         substituted.push_back('\t');
2269                                         break;
2270                                 case 'v':
2271                                         substituted.push_back('\v');
2272                                         break;
2273                                 case '\\':
2274                                         substituted.push_back('\\');
2275                                         break;
2276                                 default:
2277                                         substituted.push_back('\\');
2278                                         j--;
2279                                 }
2280                         }
2281                 } else {
2282                         substituted.push_back(text[j]);
2283                 }
2284         }
2285         *length = static_cast<int>(substituted.length());
2286         return substituted.c_str();
2287 }
2288
2289 #ifndef SCI_OWNREGEX
2290
2291 #ifdef SCI_NAMESPACE
2292
2293 RegexSearchBase *Scintilla::CreateRegexSearch(CharClassify *charClassTable) {
2294         return new BuiltinRegex(charClassTable);
2295 }
2296
2297 #else
2298
2299 RegexSearchBase *CreateRegexSearch(CharClassify *charClassTable) {
2300         return new BuiltinRegex(charClassTable);
2301 }
2302
2303 #endif
2304
2305 #endif