scintilla/src/Document.cxx

   1 // Scintilla source code edit control
   2 /** @file Document.cxx
   3  ** Text document that handles notifications, DBCS, styling, words and end of line.
   4  **/
   5 // Copyright 1998-2011 by Neil Hodgson <neilh@scintilla.org>
   6 // The License.txt file describes the conditions under which this software may be distributed.
   7
   8 #include <stdlib.h>
   9 #include <string.h>
  10 #include <stdio.h>
  11 #include <ctype.h>
  12 #include <assert.h>
  13
  14 #include <string>
  15 #include <vector>
  16
  17 #include "Platform.h"
  18
  19 #include "ILexer.h"
  20 #include "Scintilla.h"
  21
  22 #include "SplitVector.h"
  23 #include "Partitioning.h"
  24 #include "RunStyles.h"
  25 #include "CellBuffer.h"
  26 #include "PerLine.h"
  27 #include "CharClassify.h"
  28 #include "CharacterSet.h"
  29 #include "Decoration.h"
  30 #include "Document.h"
  31 #include "RESearch.h"
  32 #include "UniConversion.h"
  33
  34 #ifdef SCI_NAMESPACE
  35 using namespace Scintilla;
  36 #endif
  37
  38 static inline bool IsPunctuation(char ch) {
  39         return isascii(ch) && ispunct(ch);
  40 }
  41
  42 void LexInterface::Colourise(int start, int end) {
  43         if (pdoc && instance && !performingStyle) {
  44                 // Protect against reentrance, which may occur, for example, when
  45                 // fold points are discovered while performing styling and the folding
  46                 // code looks for child lines which may trigger styling.
  47                 performingStyle = true;
  48
  49                 int lengthDoc = pdoc->Length();
  50                 if (end == -1)
  51                         end = lengthDoc;
  52                 int len = end - start;
  53
  54                 PLATFORM_ASSERT(len >= 0);
  55                 PLATFORM_ASSERT(start + len <= lengthDoc);
  56
  57                 int styleStart = 0;
  58                 if (start > 0)
  59                         styleStart = pdoc->StyleAt(start - 1) & pdoc->stylingBitsMask;
  60
  61                 if (len > 0) {
  62                         instance->Lex(start, len, styleStart, pdoc);
  63                         instance->Fold(start, len, styleStart, pdoc);
  64                 }
  65
  66                 performingStyle = false;
  67         }
  68 }
  69
  70 Document::Document() {
  71         refCount = 0;
  72         pcf = NULL;
  73 #ifdef _WIN32
  74         eolMode = SC_EOL_CRLF;
  75 #else
  76         eolMode = SC_EOL_LF;
  77 #endif
  78         dbcsCodePage = 0;
  79         stylingBits = 5;
  80         stylingBitsMask = 0x1F;
  81         stylingMask = 0;
  82         endStyled = 0;
  83         styleClock = 0;
  84         enteredModification = 0;
  85         enteredStyling = 0;
  86         enteredReadOnlyCount = 0;
  87         tabInChars = 8;
  88         indentInChars = 0;
  89         actualIndentInChars = 8;
  90         useTabs = true;
  91         tabIndents = true;
  92         backspaceUnindents = false;
  93         watchers = 0;
  94         lenWatchers = 0;
  95
  96         matchesValid = false;
  97         regex = 0;
  98
  99         UTF8BytesOfLeadInitialise();
 100
 101         perLineData[ldMarkers] = new LineMarkers();
 102         perLineData[ldLevels] = new LineLevels();
 103         perLineData[ldState] = new LineState();
 104         perLineData[ldMargin] = new LineAnnotation();
 105         perLineData[ldAnnotation] = new LineAnnotation();
 106
 107         cb.SetPerLine(this);
 108
 109         pli = 0;
 110 }
 111
 112 Document::~Document() {
 113         for (int i = 0; i < lenWatchers; i++) {
 114                 watchers[i].watcher->NotifyDeleted(this, watchers[i].userData);
 115         }
 116         delete []watchers;
 117         for (int j=0; j<ldSize; j++) {
 118                 delete perLineData[j];
 119                 perLineData[j] = 0;
 120         }
 121         watchers = 0;
 122         lenWatchers = 0;
 123         delete regex;
 124         regex = 0;
 125         delete pli;
 126         pli = 0;
 127         delete pcf;
 128         pcf = 0;
 129 }
 130
 131 void Document::Init() {
 132         for (int j=0; j<ldSize; j++) {
 133                 if (perLineData[j])
 134                         perLineData[j]->Init();
 135         }
 136 }
 137
 138 bool Document::SetDBCSCodePage(int dbcsCodePage_) {
 139         if (dbcsCodePage != dbcsCodePage_) {
 140                 dbcsCodePage = dbcsCodePage_;
 141                 SetCaseFolder(NULL);
 142                 return true;
 143         } else {
 144                 return false;
 145         }
 146 }
 147
 148 void Document::InsertLine(int line) {
 149         for (int j=0; j<ldSize; j++) {
 150                 if (perLineData[j])
 151                         perLineData[j]->InsertLine(line);
 152         }
 153 }
 154
 155 void Document::RemoveLine(int line) {
 156         for (int j=0; j<ldSize; j++) {
 157                 if (perLineData[j])
 158                         perLineData[j]->RemoveLine(line);
 159         }
 160 }
 161
 162 // Increase reference count and return its previous value.
 163 int Document::AddRef() {
 164         return refCount++;
 165 }
 166
 167 // Decrease reference count and return its previous value.
 168 // Delete the document if reference count reaches zero.
 169 int SCI_METHOD Document::Release() {
 170         int curRefCount = --refCount;
 171         if (curRefCount == 0)
 172                 delete this;
 173         return curRefCount;
 174 }
 175
 176 void Document::SetSavePoint() {
 177         cb.SetSavePoint();
 178         NotifySavePoint(true);
 179 }
 180
 181 int Document::GetMark(int line) {
 182         return static_cast<LineMarkers *>(perLineData[ldMarkers])->MarkValue(line);
 183 }
 184
 185 int Document::MarkerNext(int lineStart, int mask) const {
 186         return static_cast<LineMarkers *>(perLineData[ldMarkers])->MarkerNext(lineStart, mask);
 187 }
 188
 189 int Document::AddMark(int line, int markerNum) {
 190         if (line >= 0 && line <= LinesTotal()) {
 191                 int prev = static_cast<LineMarkers *>(perLineData[ldMarkers])->
 192                         AddMark(line, markerNum, LinesTotal());
 193                 DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, 0, line);
 194                 NotifyModified(mh);
 195                 return prev;
 196         } else {
 197                 return 0;
 198         }
 199 }
 200
 201 void Document::AddMarkSet(int line, int valueSet) {
 202         if (line < 0 || line > LinesTotal()) {
 203                 return;
 204         }
 205         unsigned int m = valueSet;
 206         for (int i = 0; m; i++, m >>= 1)
 207                 if (m & 1)
 208                         static_cast<LineMarkers *>(perLineData[ldMarkers])->
 209                                 AddMark(line, i, LinesTotal());
 210         DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, 0, line);
 211         NotifyModified(mh);
 212 }
 213
 214 void Document::DeleteMark(int line, int markerNum) {
 215         static_cast<LineMarkers *>(perLineData[ldMarkers])->DeleteMark(line, markerNum, false);
 216         DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, 0, line);
 217         NotifyModified(mh);
 218 }
 219
 220 void Document::DeleteMarkFromHandle(int markerHandle) {
 221         static_cast<LineMarkers *>(perLineData[ldMarkers])->DeleteMarkFromHandle(markerHandle);
 222         DocModification mh(SC_MOD_CHANGEMARKER, 0, 0, 0, 0);
 223         mh.line = -1;
 224         NotifyModified(mh);
 225 }
 226
 227 void Document::DeleteAllMarks(int markerNum) {
 228         bool someChanges = false;
 229         for (int line = 0; line < LinesTotal(); line++) {
 230                 if (static_cast<LineMarkers *>(perLineData[ldMarkers])->DeleteMark(line, markerNum, true))
 231                         someChanges = true;
 232         }
 233         if (someChanges) {
 234                 DocModification mh(SC_MOD_CHANGEMARKER, 0, 0, 0, 0);
 235                 mh.line = -1;
 236                 NotifyModified(mh);
 237         }
 238 }
 239
 240 int Document::LineFromHandle(int markerHandle) {
 241         return static_cast<LineMarkers *>(perLineData[ldMarkers])->LineFromHandle(markerHandle);
 242 }
 243
 244 int SCI_METHOD Document::LineStart(int line) const {
 245         return cb.LineStart(line);
 246 }
 247
 248 int Document::LineEnd(int line) const {
 249         if (line == LinesTotal() - 1) {
 250                 return LineStart(line + 1);
 251         } else {
 252                 int position = LineStart(line + 1) - 1;
 253                 // When line terminator is CR+LF, may need to go back one more
 254                 if ((position > LineStart(line)) && (cb.CharAt(position - 1) == '\r')) {
 255                         position--;
 256                 }
 257                 return position;
 258         }
 259 }
 260
 261 void SCI_METHOD Document::SetErrorStatus(int status) {
 262         // Tell the watchers the lexer has changed.
 263         for (int i = 0; i < lenWatchers; i++) {
 264                 watchers[i].watcher->NotifyErrorOccurred(this, watchers[i].userData, status);
 265         }
 266 }
 267
 268 int SCI_METHOD Document::LineFromPosition(int pos) const {
 269         return cb.LineFromPosition(pos);
 270 }
 271
 272 int Document::LineEndPosition(int position) const {
 273         return LineEnd(LineFromPosition(position));
 274 }
 275
 276 bool Document::IsLineEndPosition(int position) const {
 277         return LineEnd(LineFromPosition(position)) == position;
 278 }
 279
 280 int Document::VCHomePosition(int position) const {
 281         int line = LineFromPosition(position);
 282         int startPosition = LineStart(line);
 283         int endLine = LineEnd(line);
 284         int startText = startPosition;
 285         while (startText < endLine && (cb.CharAt(startText) == ' ' || cb.CharAt(startText) == '\t'))
 286                 startText++;
 287         if (position == startText)
 288                 return startPosition;
 289         else
 290                 return startText;
 291 }
 292
 293 int SCI_METHOD Document::SetLevel(int line, int level) {
 294         int prev = static_cast<LineLevels *>(perLineData[ldLevels])->SetLevel(line, level, LinesTotal());
 295         if (prev != level) {
 296                 DocModification mh(SC_MOD_CHANGEFOLD | SC_MOD_CHANGEMARKER,
 297                                    LineStart(line), 0, 0, 0, line);
 298                 mh.foldLevelNow = level;
 299                 mh.foldLevelPrev = prev;
 300                 NotifyModified(mh);
 301         }
 302         return prev;
 303 }
 304
 305 int SCI_METHOD Document::GetLevel(int line) const {
 306         return static_cast<LineLevels *>(perLineData[ldLevels])->GetLevel(line);
 307 }
 308
 309 void Document::ClearLevels() {
 310         static_cast<LineLevels *>(perLineData[ldLevels])->ClearLevels();
 311 }
 312
 313 static bool IsSubordinate(int levelStart, int levelTry) {
 314         if (levelTry & SC_FOLDLEVELWHITEFLAG)
 315                 return true;
 316         else
 317                 return (levelStart & SC_FOLDLEVELNUMBERMASK) < (levelTry & SC_FOLDLEVELNUMBERMASK);
 318 }
 319
 320 int Document::GetLastChild(int lineParent, int level, int lastLine) {
 321         if (level == -1)
 322                 level = GetLevel(lineParent) & SC_FOLDLEVELNUMBERMASK;
 323         int maxLine = LinesTotal();
 324         int lookLastLine = (lastLine != -1) ? Platform::Minimum(LinesTotal() - 1, lastLine) : -1;
 325         int lineMaxSubord = lineParent;
 326         while (lineMaxSubord < maxLine - 1) {
 327                 EnsureStyledTo(LineStart(lineMaxSubord + 2));
 328                 if (!IsSubordinate(level, GetLevel(lineMaxSubord + 1)))
 329                         break;
 330                 if ((lookLastLine != -1) && (lineMaxSubord >= lookLastLine) && !(GetLevel(lineMaxSubord) & SC_FOLDLEVELWHITEFLAG))
 331                         break;
 332                 lineMaxSubord++;
 333         }
 334         if (lineMaxSubord > lineParent) {
 335                 if (level > (GetLevel(lineMaxSubord + 1) & SC_FOLDLEVELNUMBERMASK)) {
 336                         // Have chewed up some whitespace that belongs to a parent so seek back
 337                         if (GetLevel(lineMaxSubord) & SC_FOLDLEVELWHITEFLAG) {
 338                                 lineMaxSubord--;
 339                         }
 340                 }
 341         }
 342         return lineMaxSubord;
 343 }
 344
 345 int Document::GetFoldParent(int line) {
 346         int level = GetLevel(line) & SC_FOLDLEVELNUMBERMASK;
 347         int lineLook = line - 1;
 348         while ((lineLook > 0) && (
 349                     (!(GetLevel(lineLook) & SC_FOLDLEVELHEADERFLAG)) ||
 350                     ((GetLevel(lineLook) & SC_FOLDLEVELNUMBERMASK) >= level))
 351               ) {
 352                 lineLook--;
 353         }
 354         if ((GetLevel(lineLook) & SC_FOLDLEVELHEADERFLAG) &&
 355                 ((GetLevel(lineLook) & SC_FOLDLEVELNUMBERMASK) < level)) {
 356                 return lineLook;
 357         } else {
 358                 return -1;
 359         }
 360 }
 361
 362 void Document::GetHighlightDelimiters(HighlightDelimiter &highlightDelimiter, int line, int lastLine) {
 363         int level = GetLevel(line);
 364         int lookLastLine = Platform::Maximum(line, lastLine) + 1;
 365
 366         int lookLine = line;
 367         int lookLineLevel = level;
 368         int lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
 369         while ((lookLine > 0) && ((lookLineLevel & SC_FOLDLEVELWHITEFLAG) ||
 370                 ((lookLineLevel & SC_FOLDLEVELHEADERFLAG) && (lookLineLevelNum >= (GetLevel(lookLine + 1) & SC_FOLDLEVELNUMBERMASK))))) {
 371                 lookLineLevel = GetLevel(--lookLine);
 372                 lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
 373         }
 374
 375         int beginFoldBlock = (lookLineLevel & SC_FOLDLEVELHEADERFLAG) ? lookLine : GetFoldParent(lookLine);
 376         if (beginFoldBlock == -1) {
 377                 highlightDelimiter.Clear();
 378                 return;
 379         }
 380
 381         int endFoldBlock = GetLastChild(beginFoldBlock, -1, lookLastLine);
 382         int firstChangeableLineBefore = -1;
 383         if (endFoldBlock < line) {
 384                 lookLine = beginFoldBlock - 1;
 385                 lookLineLevel = GetLevel(lookLine);
 386                 lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
 387                 while ((lookLine >= 0) && (lookLineLevelNum >= SC_FOLDLEVELBASE)) {
 388                         if (lookLineLevel & SC_FOLDLEVELHEADERFLAG) {
 389                                 if (GetLastChild(lookLine, -1, lookLastLine) == line) {
 390                                         beginFoldBlock = lookLine;
 391                                         endFoldBlock = line;
 392                                         firstChangeableLineBefore = line - 1;
 393                                 }
 394                         }
 395                         if ((lookLine > 0) && (lookLineLevelNum == SC_FOLDLEVELBASE) && ((GetLevel(lookLine - 1) & SC_FOLDLEVELNUMBERMASK) > lookLineLevelNum))
 396                                 break;
 397                         lookLineLevel = GetLevel(--lookLine);
 398                         lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
 399                 }
 400         }
 401         if (firstChangeableLineBefore == -1) {
 402                 for (lookLine = line - 1, lookLineLevel = GetLevel(lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
 403                         lookLine >= beginFoldBlock;
 404                         lookLineLevel = GetLevel(--lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK) {
 405                         if ((lookLineLevel & SC_FOLDLEVELWHITEFLAG) || (lookLineLevelNum > (level & SC_FOLDLEVELNUMBERMASK))) {
 406                                 firstChangeableLineBefore = lookLine;
 407                                 break;
 408                         }
 409                 }
 410         }
 411         if (firstChangeableLineBefore == -1)
 412                 firstChangeableLineBefore = beginFoldBlock - 1;
 413
 414         int firstChangeableLineAfter = -1;
 415         for (lookLine = line + 1, lookLineLevel = GetLevel(lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
 416                 lookLine <= endFoldBlock;
 417                 lookLineLevel = GetLevel(++lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK) {
 418                 if ((lookLineLevel & SC_FOLDLEVELHEADERFLAG) && (lookLineLevelNum < (GetLevel(lookLine + 1) & SC_FOLDLEVELNUMBERMASK))) {
 419                         firstChangeableLineAfter = lookLine;
 420                         break;
 421                 }
 422         }
 423         if (firstChangeableLineAfter == -1)
 424                 firstChangeableLineAfter = endFoldBlock + 1;
 425
 426         highlightDelimiter.beginFoldBlock = beginFoldBlock;
 427         highlightDelimiter.endFoldBlock = endFoldBlock;
 428         highlightDelimiter.firstChangeableLineBefore = firstChangeableLineBefore;
 429         highlightDelimiter.firstChangeableLineAfter = firstChangeableLineAfter;
 430 }
 431
 432 int Document::ClampPositionIntoDocument(int pos) {
 433         return Platform::Clamp(pos, 0, Length());
 434 }
 435
 436 bool Document::IsCrLf(int pos) {
 437         if (pos < 0)
 438                 return false;
 439         if (pos >= (Length() - 1))
 440                 return false;
 441         return (cb.CharAt(pos) == '\r') && (cb.CharAt(pos + 1) == '\n');
 442 }
 443
 444 int Document::LenChar(int pos) {
 445         if (pos < 0) {
 446                 return 1;
 447         } else if (IsCrLf(pos)) {
 448                 return 2;
 449         } else if (SC_CP_UTF8 == dbcsCodePage) {
 450                 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(pos));
 451                 const int widthCharBytes = UTF8BytesOfLead[leadByte];
 452                 int lengthDoc = Length();
 453                 if ((pos + widthCharBytes) > lengthDoc)
 454                         return lengthDoc - pos;
 455                 else
 456                         return widthCharBytes;
 457         } else if (dbcsCodePage) {
 458                 return IsDBCSLeadByte(cb.CharAt(pos)) ? 2 : 1;
 459         } else {
 460                 return 1;
 461         }
 462 }
 463
 464 bool Document::InGoodUTF8(int pos, int &start, int &end) const {
 465         int trail = pos;
 466         while ((trail>0) && (pos-trail < UTF8MaxBytes) && UTF8IsTrailByte(static_cast<unsigned char>(cb.CharAt(trail-1))))
 467                 trail--;
 468         start = (trail > 0) ? trail-1 : trail;
 469
 470         const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(start));
 471         const int widthCharBytes = UTF8BytesOfLead[leadByte];
 472         if (widthCharBytes == 1) {
 473                 return false;
 474         } else {
 475                 int trailBytes = widthCharBytes - 1;
 476                 int len = pos - start;
 477                 if (len > trailBytes)
 478                         // pos too far from lead
 479                         return false;
 480                 char charBytes[UTF8MaxBytes] = {static_cast<char>(leadByte),0,0,0};
 481                 for (int b=1; b<widthCharBytes && ((start+b) < Length()); b++)
 482                         charBytes[b] = cb.CharAt(static_cast<int>(start+b));
 483                 int utf8status = UTF8Classify(reinterpret_cast<const unsigned char *>(charBytes), widthCharBytes);
 484                 if (utf8status & UTF8MaskInvalid)
 485                         return false;
 486                 end = start + widthCharBytes;
 487                 return true;
 488         }
 489 }
 490
 491 // Normalise a position so that it is not halfway through a two byte character.
 492 // This can occur in two situations -
 493 // When lines are terminated with \r\n pairs which should be treated as one character.
 494 // When displaying DBCS text such as Japanese.
 495 // If moving, move the position in the indicated direction.
 496 int Document::MovePositionOutsideChar(int pos, int moveDir, bool checkLineEnd) {
 497         //Platform::DebugPrintf("NoCRLF %d %d\n", pos, moveDir);
 498         // If out of range, just return minimum/maximum value.
 499         if (pos <= 0)
 500                 return 0;
 501         if (pos >= Length())
 502                 return Length();
 503
 504         // PLATFORM_ASSERT(pos > 0 && pos < Length());
 505         if (checkLineEnd && IsCrLf(pos - 1)) {
 506                 if (moveDir > 0)
 507                         return pos + 1;
 508                 else
 509                         return pos - 1;
 510         }
 511
 512         if (dbcsCodePage) {
 513                 if (SC_CP_UTF8 == dbcsCodePage) {
 514                         unsigned char ch = static_cast<unsigned char>(cb.CharAt(pos));
 515                         // If ch is not a trail byte then pos is valid intercharacter position
 516                         if (UTF8IsTrailByte(ch)) {
 517                                 int startUTF = pos;
 518                                 int endUTF = pos;
 519                                 if (InGoodUTF8(pos, startUTF, endUTF)) {
 520                                         // ch is a trail byte within a UTF-8 character
 521                                         if (moveDir > 0)
 522                                                 pos = endUTF;
 523                                         else
 524                                                 pos = startUTF;
 525                                 }
 526                                 // Else invalid UTF-8 so return position of isolated trail byte
 527                         }
 528                 } else {
 529                         // Anchor DBCS calculations at start of line because start of line can
 530                         // not be a DBCS trail byte.
 531                         int posStartLine = LineStart(LineFromPosition(pos));
 532                         if (pos == posStartLine)
 533                                 return pos;
 534
 535                         // Step back until a non-lead-byte is found.
 536                         int posCheck = pos;
 537                         while ((posCheck > posStartLine) && IsDBCSLeadByte(cb.CharAt(posCheck-1)))
 538                                 posCheck--;
 539
 540                         // Check from known start of character.
 541                         while (posCheck < pos) {
 542                                 int mbsize = IsDBCSLeadByte(cb.CharAt(posCheck)) ? 2 : 1;
 543                                 if (posCheck + mbsize == pos) {
 544                                         return pos;
 545                                 } else if (posCheck + mbsize > pos) {
 546                                         if (moveDir > 0) {
 547                                                 return posCheck + mbsize;
 548                                         } else {
 549                                                 return posCheck;
 550                                         }
 551                                 }
 552                                 posCheck += mbsize;
 553                         }
 554                 }
 555         }
 556
 557         return pos;
 558 }
 559
 560 // NextPosition moves between valid positions - it can not handle a position in the middle of a
 561 // multi-byte character. It is used to iterate through text more efficiently than MovePositionOutsideChar.
 562 // A \r\n pair is treated as two characters.
 563 int Document::NextPosition(int pos, int moveDir) const {
 564         // If out of range, just return minimum/maximum value.
 565         int increment = (moveDir > 0) ? 1 : -1;
 566         if (pos + increment <= 0)
 567                 return 0;
 568         if (pos + increment >= Length())
 569                 return Length();
 570
 571         if (dbcsCodePage) {
 572                 if (SC_CP_UTF8 == dbcsCodePage) {
 573                         if (increment == 1) {
 574                                 // Simple forward movement case so can avoid some checks
 575                                 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(pos));
 576                                 if (UTF8IsAscii(leadByte)) {
 577                                         // Single byte character or invalid
 578                                         pos++;
 579                                 } else {
 580                                         const int widthCharBytes = UTF8BytesOfLead[leadByte];
 581                                         char charBytes[UTF8MaxBytes] = {static_cast<char>(leadByte),0,0,0};
 582                                         for (int b=1; b<widthCharBytes; b++)
 583                                                 charBytes[b] = cb.CharAt(static_cast<int>(pos+b));
 584                                         int utf8status = UTF8Classify(reinterpret_cast<const unsigned char *>(charBytes), widthCharBytes);
 585                                         if (utf8status & UTF8MaskInvalid)
 586                                                 pos++;
 587                                         else
 588                                                 pos += utf8status & UTF8MaskWidth;
 589                                 }
 590                         } else {
 591                                 // Examine byte before position
 592                                 pos--;
 593                                 unsigned char ch = static_cast<unsigned char>(cb.CharAt(pos));
 594                                 // If ch is not a trail byte then pos is valid intercharacter position
 595                                 if (UTF8IsTrailByte(ch)) {
 596                                         // If ch is a trail byte in a valid UTF-8 character then return start of character
 597                                         int startUTF = pos;
 598                                         int endUTF = pos;
 599                                         if (InGoodUTF8(pos, startUTF, endUTF)) {
 600                                                 pos = startUTF;
 601                                         }
 602                                         // Else invalid UTF-8 so return position of isolated trail byte
 603                                 }
 604                         }
 605                 } else {
 606                         if (moveDir > 0) {
 607                                 int mbsize = IsDBCSLeadByte(cb.CharAt(pos)) ? 2 : 1;
 608                                 pos += mbsize;
 609                                 if (pos > Length())
 610                                         pos = Length();
 611                         } else {
 612                                 // Anchor DBCS calculations at start of line because start of line can
 613                                 // not be a DBCS trail byte.
 614                                 int posStartLine = LineStart(LineFromPosition(pos));
 615                                 // See http://msdn.microsoft.com/en-us/library/cc194792%28v=MSDN.10%29.aspx
 616                                 // http://msdn.microsoft.com/en-us/library/cc194790.aspx
 617                                 if ((pos - 1) <= posStartLine) {
 618                                         return pos - 1;
 619                                 } else if (IsDBCSLeadByte(cb.CharAt(pos - 1))) {
 620                                         // Must actually be trail byte
 621                                         return pos - 2;
 622                                 } else {
 623                                         // Otherwise, step back until a non-lead-byte is found.
 624                                         int posTemp = pos - 1;
 625                                         while (posStartLine <= --posTemp && IsDBCSLeadByte(cb.CharAt(posTemp)))
 626                                                 ;
 627                                         // Now posTemp+1 must point to the beginning of a character,
 628                                         // so figure out whether we went back an even or an odd
 629                                         // number of bytes and go back 1 or 2 bytes, respectively.
 630                                         return (pos - 1 - ((pos - posTemp) & 1));
 631                                 }
 632                         }
 633                 }
 634         } else {
 635                 pos += increment;
 636         }
 637
 638         return pos;
 639 }
 640
 641 bool Document::NextCharacter(int &pos, int moveDir) {
 642         // Returns true if pos changed
 643         int posNext = NextPosition(pos, moveDir);
 644         if (posNext == pos) {
 645                 return false;
 646         } else {
 647                 pos = posNext;
 648                 return true;
 649         }
 650 }
 651
 652 int SCI_METHOD Document::CodePage() const {
 653         return dbcsCodePage;
 654 }
 655
 656 bool SCI_METHOD Document::IsDBCSLeadByte(char ch) const {
 657         // Byte ranges found in Wikipedia articles with relevant search strings in each case
 658         unsigned char uch = static_cast<unsigned char>(ch);
 659         switch (dbcsCodePage) {
 660                 case 932:
 661                         // Shift_jis
 662                         return ((uch >= 0x81) && (uch <= 0x9F)) ||
 663                                 ((uch >= 0xE0) && (uch <= 0xFC));
 664                                 // Lead bytes F0 to FC may be a Microsoft addition.
 665                 case 936:
 666                         // GBK
 667                         return (uch >= 0x81) && (uch <= 0xFE);
 668                 case 949:
 669                         // Korean Wansung KS C-5601-1987
 670                         return (uch >= 0x81) && (uch <= 0xFE);
 671                 case 950:
 672                         // Big5
 673                         return (uch >= 0x81) && (uch <= 0xFE);
 674                 case 1361:
 675                         // Korean Johab KS C-5601-1992
 676                         return
 677                                 ((uch >= 0x84) && (uch <= 0xD3)) ||
 678                                 ((uch >= 0xD8) && (uch <= 0xDE)) ||
 679                                 ((uch >= 0xE0) && (uch <= 0xF9));
 680         }
 681         return false;
 682 }
 683
 684 static inline bool IsSpaceOrTab(int ch) {
 685         return ch == ' ' || ch == '\t';
 686 }
 687
 688 // Need to break text into segments near lengthSegment but taking into
 689 // account the encoding to not break inside a UTF-8 or DBCS character
 690 // and also trying to avoid breaking inside a pair of combining characters.
 691 // The segment length must always be long enough (more than 4 bytes)
 692 // so that there will be at least one whole character to make a segment.
 693 // For UTF-8, text must consist only of valid whole characters.
 694 // In preference order from best to worst:
 695 //   1) Break after space
 696 //   2) Break before punctuation
 697 //   3) Break after whole character
 698
 699 int Document::SafeSegment(const char *text, int length, int lengthSegment) {
 700         if (length <= lengthSegment)
 701                 return length;
 702         int lastSpaceBreak = -1;
 703         int lastPunctuationBreak = -1;
 704         int lastEncodingAllowedBreak = -1;
 705         for (int j=0; j < lengthSegment;) {
 706                 unsigned char ch = static_cast<unsigned char>(text[j]);
 707                 if (j > 0) {
 708                         if (IsSpaceOrTab(text[j - 1]) && !IsSpaceOrTab(text[j])) {
 709                                 lastSpaceBreak = j;
 710                         }
 711                         if (ch < 'A') {
 712                                 lastPunctuationBreak = j;
 713                         }
 714                 }
 715                 lastEncodingAllowedBreak = j;
 716
 717                 if (dbcsCodePage == SC_CP_UTF8) {
 718                         j += UTF8BytesOfLead[ch];
 719                 } else if (dbcsCodePage) {
 720                         j += IsDBCSLeadByte(ch) ? 2 : 1;
 721                 } else {
 722                         j++;
 723                 }
 724         }
 725         if (lastSpaceBreak >= 0) {
 726                 return lastSpaceBreak;
 727         } else if (lastPunctuationBreak >= 0) {
 728                 return lastPunctuationBreak;
 729         }
 730         return lastEncodingAllowedBreak;
 731 }
 732
 733 void Document::ModifiedAt(int pos) {
 734         if (endStyled > pos)
 735                 endStyled = pos;
 736 }
 737
 738 void Document::CheckReadOnly() {
 739         if (cb.IsReadOnly() && enteredReadOnlyCount == 0) {
 740                 enteredReadOnlyCount++;
 741                 NotifyModifyAttempt();
 742                 enteredReadOnlyCount--;
 743         }
 744 }
 745
 746 // Document only modified by gateways DeleteChars, InsertString, Undo, Redo, and SetStyleAt.
 747 // SetStyleAt does not change the persistent state of a document
 748
 749 bool Document::DeleteChars(int pos, int len) {
 750         if (len <= 0)
 751                 return false;
 752         if ((pos + len) > Length())
 753                 return false;
 754         CheckReadOnly();
 755         if (enteredModification != 0) {
 756                 return false;
 757         } else {
 758                 enteredModification++;
 759                 if (!cb.IsReadOnly()) {
 760                         NotifyModified(
 761                             DocModification(
 762                                 SC_MOD_BEFOREDELETE | SC_PERFORMED_USER,
 763                                 pos, len,
 764                                 0, 0));
 765                         int prevLinesTotal = LinesTotal();
 766                         bool startSavePoint = cb.IsSavePoint();
 767                         bool startSequence = false;
 768                         const char *text = cb.DeleteChars(pos, len, startSequence);
 769                         if (startSavePoint && cb.IsCollectingUndo())
 770                                 NotifySavePoint(!startSavePoint);
 771                         if ((pos < Length()) || (pos == 0))
 772                                 ModifiedAt(pos);
 773                         else
 774                                 ModifiedAt(pos-1);
 775                         NotifyModified(
 776                             DocModification(
 777                                 SC_MOD_DELETETEXT | SC_PERFORMED_USER | (startSequence?SC_STARTACTION:0),
 778                                 pos, len,
 779                                 LinesTotal() - prevLinesTotal, text));
 780                 }
 781                 enteredModification--;
 782         }
 783         return !cb.IsReadOnly();
 784 }
 785
 786 /**
 787  * Insert a string with a length.
 788  */
 789 bool Document::InsertString(int position, const char *s, int insertLength) {
 790         if (insertLength <= 0) {
 791                 return false;
 792         }
 793         CheckReadOnly();
 794         if (enteredModification != 0) {
 795                 return false;
 796         } else {
 797                 enteredModification++;
 798                 if (!cb.IsReadOnly()) {
 799                         NotifyModified(
 800                             DocModification(
 801                                 SC_MOD_BEFOREINSERT | SC_PERFORMED_USER,
 802                                 position, insertLength,
 803                                 0, s));
 804                         int prevLinesTotal = LinesTotal();
 805                         bool startSavePoint = cb.IsSavePoint();
 806                         bool startSequence = false;
 807                         const char *text = cb.InsertString(position, s, insertLength, startSequence);
 808                         if (startSavePoint && cb.IsCollectingUndo())
 809                                 NotifySavePoint(!startSavePoint);
 810                         ModifiedAt(position);
 811                         NotifyModified(
 812                             DocModification(
 813                                 SC_MOD_INSERTTEXT | SC_PERFORMED_USER | (startSequence?SC_STARTACTION:0),
 814                                 position, insertLength,
 815                                 LinesTotal() - prevLinesTotal, text));
 816                 }
 817                 enteredModification--;
 818         }
 819         return !cb.IsReadOnly();
 820 }
 821
 822 int SCI_METHOD Document::AddData(char *data, int length) {
 823         try {
 824                 int position = Length();
 825                 InsertString(position,data, length);
 826         } catch (std::bad_alloc &) {
 827                 return SC_STATUS_BADALLOC;
 828         } catch (...) {
 829                 return SC_STATUS_FAILURE;
 830         }
 831         return 0;
 832 }
 833
 834 void * SCI_METHOD Document::ConvertToDocument() {
 835         return this;
 836 }
 837
 838 int Document::Undo() {
 839         int newPos = -1;
 840         CheckReadOnly();
 841         if (enteredModification == 0) {
 842                 enteredModification++;
 843                 if (!cb.IsReadOnly()) {
 844                         bool startSavePoint = cb.IsSavePoint();
 845                         bool multiLine = false;
 846                         int steps = cb.StartUndo();
 847                         //Platform::DebugPrintf("Steps=%d\n", steps);
 848                         int coalescedRemovePos = -1;
 849                         int coalescedRemoveLen = 0;
 850                         int prevRemoveActionPos = -1;
 851                         int prevRemoveActionLen = 0;
 852                         for (int step = 0; step < steps; step++) {
 853                                 const int prevLinesTotal = LinesTotal();
 854                                 const Action &action = cb.GetUndoStep();
 855                                 if (action.at == removeAction) {
 856                                         NotifyModified(DocModification(
 857                                                                         SC_MOD_BEFOREINSERT | SC_PERFORMED_UNDO, action));
 858                                 } else if (action.at == containerAction) {
 859                                         DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_UNDO);
 860                                         dm.token = action.position;
 861                                         NotifyModified(dm);
 862                                         if (!action.mayCoalesce) {
 863                                                 coalescedRemovePos = -1;
 864                                                 coalescedRemoveLen = 0;
 865                                                 prevRemoveActionPos = -1;
 866                                                 prevRemoveActionLen = 0;
 867                                         }
 868                                 } else {
 869                                         NotifyModified(DocModification(
 870                                                                         SC_MOD_BEFOREDELETE | SC_PERFORMED_UNDO, action));
 871                                 }
 872                                 cb.PerformUndoStep();
 873                                 if (action.at != containerAction) {
 874                                         ModifiedAt(action.position);
 875                                         newPos = action.position;
 876                                 }
 877
 878                                 int modFlags = SC_PERFORMED_UNDO;
 879                                 // With undo, an insertion action becomes a deletion notification
 880                                 if (action.at == removeAction) {
 881                                         newPos += action.lenData;
 882                                         modFlags |= SC_MOD_INSERTTEXT;
 883                                         if ((coalescedRemoveLen > 0) &&
 884                                                 (action.position == prevRemoveActionPos || action.position == (prevRemoveActionPos + prevRemoveActionLen))) {
 885                                                 coalescedRemoveLen += action.lenData;
 886                                                 newPos = coalescedRemovePos + coalescedRemoveLen;
 887                                         } else {
 888                                                 coalescedRemovePos = action.position;
 889                                                 coalescedRemoveLen = action.lenData;
 890                                         }
 891                                         prevRemoveActionPos = action.position;
 892                                         prevRemoveActionLen = action.lenData;
 893                                 } else if (action.at == insertAction) {
 894                                         modFlags |= SC_MOD_DELETETEXT;
 895                                         coalescedRemovePos = -1;
 896                                         coalescedRemoveLen = 0;
 897                                         prevRemoveActionPos = -1;
 898                                         prevRemoveActionLen = 0;
 899                                 }
 900                                 if (steps > 1)
 901                                         modFlags |= SC_MULTISTEPUNDOREDO;
 902                                 const int linesAdded = LinesTotal() - prevLinesTotal;
 903                                 if (linesAdded != 0)
 904                                         multiLine = true;
 905                                 if (step == steps - 1) {
 906                                         modFlags |= SC_LASTSTEPINUNDOREDO;
 907                                         if (multiLine)
 908                                                 modFlags |= SC_MULTILINEUNDOREDO;
 909                                 }
 910                                 NotifyModified(DocModification(modFlags, action.position, action.lenData,
 911                                                                                            linesAdded, action.data));
 912                         }
 913
 914                         bool endSavePoint = cb.IsSavePoint();
 915                         if (startSavePoint != endSavePoint)
 916                                 NotifySavePoint(endSavePoint);
 917                 }
 918                 enteredModification--;
 919         }
 920         return newPos;
 921 }
 922
 923 int Document::Redo() {
 924         int newPos = -1;
 925         CheckReadOnly();
 926         if (enteredModification == 0) {
 927                 enteredModification++;
 928                 if (!cb.IsReadOnly()) {
 929                         bool startSavePoint = cb.IsSavePoint();
 930                         bool multiLine = false;
 931                         int steps = cb.StartRedo();
 932                         for (int step = 0; step < steps; step++) {
 933                                 const int prevLinesTotal = LinesTotal();
 934                                 const Action &action = cb.GetRedoStep();
 935                                 if (action.at == insertAction) {
 936                                         NotifyModified(DocModification(
 937                                                                         SC_MOD_BEFOREINSERT | SC_PERFORMED_REDO, action));
 938                                 } else if (action.at == containerAction) {
 939                                         DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_REDO);
 940                                         dm.token = action.position;
 941                                         NotifyModified(dm);
 942                                 } else {
 943                                         NotifyModified(DocModification(
 944                                                                         SC_MOD_BEFOREDELETE | SC_PERFORMED_REDO, action));
 945                                 }
 946                                 cb.PerformRedoStep();
 947                                 if (action.at != containerAction) {
 948                                         ModifiedAt(action.position);
 949                                         newPos = action.position;
 950                                 }
 951
 952                                 int modFlags = SC_PERFORMED_REDO;
 953                                 if (action.at == insertAction) {
 954                                         newPos += action.lenData;
 955                                         modFlags |= SC_MOD_INSERTTEXT;
 956                                 } else if (action.at == removeAction) {
 957                                         modFlags |= SC_MOD_DELETETEXT;
 958                                 }
 959                                 if (steps > 1)
 960                                         modFlags |= SC_MULTISTEPUNDOREDO;
 961                                 const int linesAdded = LinesTotal() - prevLinesTotal;
 962                                 if (linesAdded != 0)
 963                                         multiLine = true;
 964                                 if (step == steps - 1) {
 965                                         modFlags |= SC_LASTSTEPINUNDOREDO;
 966                                         if (multiLine)
 967                                                 modFlags |= SC_MULTILINEUNDOREDO;
 968                                 }
 969                                 NotifyModified(
 970                                         DocModification(modFlags, action.position, action.lenData,
 971                                                                         linesAdded, action.data));
 972                         }
 973
 974                         bool endSavePoint = cb.IsSavePoint();
 975                         if (startSavePoint != endSavePoint)
 976                                 NotifySavePoint(endSavePoint);
 977                 }
 978                 enteredModification--;
 979         }
 980         return newPos;
 981 }
 982
 983 /**
 984  * Insert a single character.
 985  */
 986 bool Document::InsertChar(int pos, char ch) {
 987         char chs[1];
 988         chs[0] = ch;
 989         return InsertString(pos, chs, 1);
 990 }
 991
 992 /**
 993  * Insert a null terminated string.
 994  */
 995 bool Document::InsertCString(int position, const char *s) {
 996         return InsertString(position, s, static_cast<int>(s ? strlen(s) : 0));
 997 }
 998
 999 void Document::ChangeChar(int pos, char ch) {
1000         DeleteChars(pos, 1);
1001         InsertChar(pos, ch);
1002 }
1003
1004 void Document::DelChar(int pos) {
1005         DeleteChars(pos, LenChar(pos));
1006 }
1007
1008 void Document::DelCharBack(int pos) {
1009         if (pos <= 0) {
1010                 return;
1011         } else if (IsCrLf(pos - 2)) {
1012                 DeleteChars(pos - 2, 2);
1013         } else if (dbcsCodePage) {
1014                 int startChar = NextPosition(pos, -1);
1015                 DeleteChars(startChar, pos - startChar);
1016         } else {
1017                 DeleteChars(pos - 1, 1);
1018         }
1019 }
1020
1021 static int NextTab(int pos, int tabSize) {
1022         return ((pos / tabSize) + 1) * tabSize;
1023 }
1024
1025 static std::string CreateIndentation(int indent, int tabSize, bool insertSpaces) {
1026         std::string indentation;
1027         if (!insertSpaces) {
1028                 while (indent >= tabSize) {
1029                         indentation += '\t';
1030                         indent -= tabSize;
1031                 }
1032         }
1033         while (indent > 0) {
1034                 indentation += ' ';
1035                 indent--;
1036         }
1037         return indentation;
1038 }
1039
1040 int SCI_METHOD Document::GetLineIndentation(int line) {
1041         int indent = 0;
1042         if ((line >= 0) && (line < LinesTotal())) {
1043                 int lineStart = LineStart(line);
1044                 int length = Length();
1045                 for (int i = lineStart; i < length; i++) {
1046                         char ch = cb.CharAt(i);
1047                         if (ch == ' ')
1048                                 indent++;
1049                         else if (ch == '\t')
1050                                 indent = NextTab(indent, tabInChars);
1051                         else
1052                                 return indent;
1053                 }
1054         }
1055         return indent;
1056 }
1057
1058 void Document::SetLineIndentation(int line, int indent) {
1059         int indentOfLine = GetLineIndentation(line);
1060         if (indent < 0)
1061                 indent = 0;
1062         if (indent != indentOfLine) {
1063                 std::string linebuf = CreateIndentation(indent, tabInChars, !useTabs);
1064                 int thisLineStart = LineStart(line);
1065                 int indentPos = GetLineIndentPosition(line);
1066                 UndoGroup ug(this);
1067                 DeleteChars(thisLineStart, indentPos - thisLineStart);
1068                 InsertCString(thisLineStart, linebuf.c_str());
1069         }
1070 }
1071
1072 int Document::GetLineIndentPosition(int line) const {
1073         if (line < 0)
1074                 return 0;
1075         int pos = LineStart(line);
1076         int length = Length();
1077         while ((pos < length) && IsSpaceOrTab(cb.CharAt(pos))) {
1078                 pos++;
1079         }
1080         return pos;
1081 }
1082
1083 int Document::GetColumn(int pos) {
1084         int column = 0;
1085         int line = LineFromPosition(pos);
1086         if ((line >= 0) && (line < LinesTotal())) {
1087                 for (int i = LineStart(line); i < pos;) {
1088                         char ch = cb.CharAt(i);
1089                         if (ch == '\t') {
1090                                 column = NextTab(column, tabInChars);
1091                                 i++;
1092                         } else if (ch == '\r') {
1093                                 return column;
1094                         } else if (ch == '\n') {
1095                                 return column;
1096                         } else if (i >= Length()) {
1097                                 return column;
1098                         } else {
1099                                 column++;
1100                                 i = NextPosition(i, 1);
1101                         }
1102                 }
1103         }
1104         return column;
1105 }
1106
1107 int Document::CountCharacters(int startPos, int endPos) {
1108         startPos = MovePositionOutsideChar(startPos, 1, false);
1109         endPos = MovePositionOutsideChar(endPos, -1, false);
1110         int count = 0;
1111         int i = startPos;
1112         while (i < endPos) {
1113                 count++;
1114                 if (IsCrLf(i))
1115                         i++;
1116                 i = NextPosition(i, 1);
1117         }
1118         return count;
1119 }
1120
1121 int Document::FindColumn(int line, int column) {
1122         int position = LineStart(line);
1123         if ((line >= 0) && (line < LinesTotal())) {
1124                 int columnCurrent = 0;
1125                 while ((columnCurrent < column) && (position < Length())) {
1126                         char ch = cb.CharAt(position);
1127                         if (ch == '\t') {
1128                                 columnCurrent = NextTab(columnCurrent, tabInChars);
1129                                 if (columnCurrent > column)
1130                                         return position;
1131                                 position++;
1132                         } else if (ch == '\r') {
1133                                 return position;
1134                         } else if (ch == '\n') {
1135                                 return position;
1136                         } else {
1137                                 columnCurrent++;
1138                                 position = NextPosition(position, 1);
1139                         }
1140                 }
1141         }
1142         return position;
1143 }
1144
1145 void Document::Indent(bool forwards, int lineBottom, int lineTop) {
1146         // Dedent - suck white space off the front of the line to dedent by equivalent of a tab
1147         for (int line = lineBottom; line >= lineTop; line--) {
1148                 int indentOfLine = GetLineIndentation(line);
1149                 if (forwards) {
1150                         if (LineStart(line) < LineEnd(line)) {
1151                                 SetLineIndentation(line, indentOfLine + IndentSize());
1152                         }
1153                 } else {
1154                         SetLineIndentation(line, indentOfLine - IndentSize());
1155                 }
1156         }
1157 }
1158
1159 // Convert line endings for a piece of text to a particular mode.
1160 // Stop at len or when a NUL is found.
1161 // Caller must delete the returned pointer.
1162 char *Document::TransformLineEnds(int *pLenOut, const char *s, size_t len, int eolModeWanted) {
1163         char *dest = new char[2 * len + 1];
1164         const char *sptr = s;
1165         char *dptr = dest;
1166         for (size_t i = 0; (i < len) && (*sptr != '\0'); i++) {
1167                 if (*sptr == '\n' || *sptr == '\r') {
1168                         if (eolModeWanted == SC_EOL_CR) {
1169                                 *dptr++ = '\r';
1170                         } else if (eolModeWanted == SC_EOL_LF) {
1171                                 *dptr++ = '\n';
1172                         } else { // eolModeWanted == SC_EOL_CRLF
1173                                 *dptr++ = '\r';
1174                                 *dptr++ = '\n';
1175                         }
1176                         if ((*sptr == '\r') && (i+1 < len) && (*(sptr+1) == '\n')) {
1177                                 i++;
1178                                 sptr++;
1179                         }
1180                         sptr++;
1181                 } else {
1182                         *dptr++ = *sptr++;
1183                 }
1184         }
1185         *dptr++ = '\0';
1186         *pLenOut = (dptr - dest) - 1;
1187         return dest;
1188 }
1189
1190 void Document::ConvertLineEnds(int eolModeSet) {
1191         UndoGroup ug(this);
1192
1193         for (int pos = 0; pos < Length(); pos++) {
1194                 if (cb.CharAt(pos) == '\r') {
1195                         if (cb.CharAt(pos + 1) == '\n') {
1196                                 // CRLF
1197                                 if (eolModeSet == SC_EOL_CR) {
1198                                         DeleteChars(pos + 1, 1); // Delete the LF
1199                                 } else if (eolModeSet == SC_EOL_LF) {
1200                                         DeleteChars(pos, 1); // Delete the CR
1201                                 } else {
1202                                         pos++;
1203                                 }
1204                         } else {
1205                                 // CR
1206                                 if (eolModeSet == SC_EOL_CRLF) {
1207                                         InsertString(pos + 1, "\n", 1); // Insert LF
1208                                         pos++;
1209                                 } else if (eolModeSet == SC_EOL_LF) {
1210                                         InsertString(pos, "\n", 1); // Insert LF
1211                                         DeleteChars(pos + 1, 1); // Delete CR
1212                                 }
1213                         }
1214                 } else if (cb.CharAt(pos) == '\n') {
1215                         // LF
1216                         if (eolModeSet == SC_EOL_CRLF) {
1217                                 InsertString(pos, "\r", 1); // Insert CR
1218                                 pos++;
1219                         } else if (eolModeSet == SC_EOL_CR) {
1220                                 InsertString(pos, "\r", 1); // Insert CR
1221                                 DeleteChars(pos + 1, 1); // Delete LF
1222                         }
1223                 }
1224         }
1225
1226 }
1227
1228 bool Document::IsWhiteLine(int line) const {
1229         int currentChar = LineStart(line);
1230         int endLine = LineEnd(line);
1231         while (currentChar < endLine) {
1232                 if (cb.CharAt(currentChar) != ' ' && cb.CharAt(currentChar) != '\t') {
1233                         return false;
1234                 }
1235                 ++currentChar;
1236         }
1237         return true;
1238 }
1239
1240 int Document::ParaUp(int pos) {
1241         int line = LineFromPosition(pos);
1242         line--;
1243         while (line >= 0 && IsWhiteLine(line)) { // skip empty lines
1244                 line--;
1245         }
1246         while (line >= 0 && !IsWhiteLine(line)) { // skip non-empty lines
1247                 line--;
1248         }
1249         line++;
1250         return LineStart(line);
1251 }
1252
1253 int Document::ParaDown(int pos) {
1254         int line = LineFromPosition(pos);
1255         while (line < LinesTotal() && !IsWhiteLine(line)) { // skip non-empty lines
1256                 line++;
1257         }
1258         while (line < LinesTotal() && IsWhiteLine(line)) { // skip empty lines
1259                 line++;
1260         }
1261         if (line < LinesTotal())
1262                 return LineStart(line);
1263         else // end of a document
1264                 return LineEnd(line-1);
1265 }
1266
1267 CharClassify::cc Document::WordCharClass(unsigned char ch) {
1268         if ((SC_CP_UTF8 == dbcsCodePage) && (!UTF8IsAscii(ch)))
1269                 return CharClassify::ccWord;
1270         return charClass.GetClass(ch);
1271 }
1272
1273 /**
1274  * Used by commmands that want to select whole words.
1275  * Finds the start of word at pos when delta < 0 or the end of the word when delta >= 0.
1276  */
1277 int Document::ExtendWordSelect(int pos, int delta, bool onlyWordCharacters) {
1278         CharClassify::cc ccStart = CharClassify::ccWord;
1279         if (delta < 0) {
1280                 if (!onlyWordCharacters)
1281                         ccStart = WordCharClass(cb.CharAt(pos-1));
1282                 while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == ccStart))
1283                         pos--;
1284         } else {
1285                 if (!onlyWordCharacters && pos < Length())
1286                         ccStart = WordCharClass(cb.CharAt(pos));
1287                 while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == ccStart))
1288                         pos++;
1289         }
1290         return MovePositionOutsideChar(pos, delta, true);
1291 }
1292
1293 /**
1294  * Find the start of the next word in either a forward (delta >= 0) or backwards direction
1295  * (delta < 0).
1296  * This is looking for a transition between character classes although there is also some
1297  * additional movement to transit white space.
1298  * Used by cursor movement by word commands.
1299  */
1300 int Document::NextWordStart(int pos, int delta) {
1301         if (delta < 0) {
1302                 while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == CharClassify::ccSpace))
1303                         pos--;
1304                 if (pos > 0) {
1305                         CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos-1));
1306                         while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == ccStart)) {
1307                                 pos--;
1308                         }
1309                 }
1310         } else {
1311                 CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos));
1312                 while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == ccStart))
1313                         pos++;
1314                 while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == CharClassify::ccSpace))
1315                         pos++;
1316         }
1317         return pos;
1318 }
1319
1320 /**
1321  * Find the end of the next word in either a forward (delta >= 0) or backwards direction
1322  * (delta < 0).
1323  * This is looking for a transition between character classes although there is also some
1324  * additional movement to transit white space.
1325  * Used by cursor movement by word commands.
1326  */
1327 int Document::NextWordEnd(int pos, int delta) {
1328         if (delta < 0) {
1329                 if (pos > 0) {
1330                         CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos-1));
1331                         if (ccStart != CharClassify::ccSpace) {
1332                                 while (pos > 0 && WordCharClass(cb.CharAt(pos - 1)) == ccStart) {
1333                                         pos--;
1334                                 }
1335                         }
1336                         while (pos > 0 && WordCharClass(cb.CharAt(pos - 1)) == CharClassify::ccSpace) {
1337                                 pos--;
1338                         }
1339                 }
1340         } else {
1341                 while (pos < Length() && WordCharClass(cb.CharAt(pos)) == CharClassify::ccSpace) {
1342                         pos++;
1343                 }
1344                 if (pos < Length()) {
1345                         CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos));
1346                         while (pos < Length() && WordCharClass(cb.CharAt(pos)) == ccStart) {
1347                                 pos++;
1348                         }
1349                 }
1350         }
1351         return pos;
1352 }
1353
1354 /**
1355  * Check that the character at the given position is a word or punctuation character and that
1356  * the previous character is of a different character class.
1357  */
1358 bool Document::IsWordStartAt(int pos) {
1359         if (pos > 0) {
1360                 CharClassify::cc ccPos = WordCharClass(CharAt(pos));
1361                 return (ccPos == CharClassify::ccWord || ccPos == CharClassify::ccPunctuation) &&
1362                         (ccPos != WordCharClass(CharAt(pos - 1)));
1363         }
1364         return true;
1365 }
1366
1367 /**
1368  * Check that the character at the given position is a word or punctuation character and that
1369  * the next character is of a different character class.
1370  */
1371 bool Document::IsWordEndAt(int pos) {
1372         if (pos < Length()) {
1373                 CharClassify::cc ccPrev = WordCharClass(CharAt(pos-1));
1374                 return (ccPrev == CharClassify::ccWord || ccPrev == CharClassify::ccPunctuation) &&
1375                         (ccPrev != WordCharClass(CharAt(pos)));
1376         }
1377         return true;
1378 }
1379
1380 /**
1381  * Check that the given range is has transitions between character classes at both
1382  * ends and where the characters on the inside are word or punctuation characters.
1383  */
1384 bool Document::IsWordAt(int start, int end) {
1385         return IsWordStartAt(start) && IsWordEndAt(end);
1386 }
1387
1388 static inline char MakeLowerCase(char ch) {
1389         if (ch < 'A' || ch > 'Z')
1390                 return ch;
1391         else
1392                 return static_cast<char>(ch - 'A' + 'a');
1393 }
1394
1395 CaseFolderTable::CaseFolderTable() {
1396         for (size_t iChar=0; iChar<sizeof(mapping); iChar++) {
1397                 mapping[iChar] = static_cast<char>(iChar);
1398         }
1399 }
1400
1401 CaseFolderTable::~CaseFolderTable() {
1402 }
1403
1404 size_t CaseFolderTable::Fold(char *folded, size_t sizeFolded, const char *mixed, size_t lenMixed) {
1405         if (lenMixed > sizeFolded) {
1406                 return 0;
1407         } else {
1408                 for (size_t i=0; i<lenMixed; i++) {
1409                         folded[i] = mapping[static_cast<unsigned char>(mixed[i])];
1410                 }
1411                 return lenMixed;
1412         }
1413 }
1414
1415 void CaseFolderTable::SetTranslation(char ch, char chTranslation) {
1416         mapping[static_cast<unsigned char>(ch)] = chTranslation;
1417 }
1418
1419 void CaseFolderTable::StandardASCII() {
1420         for (size_t iChar=0; iChar<sizeof(mapping); iChar++) {
1421                 if (iChar >= 'A' && iChar <= 'Z') {
1422                         mapping[iChar] = static_cast<char>(iChar - 'A' + 'a');
1423                 } else {
1424                         mapping[iChar] = static_cast<char>(iChar);
1425                 }
1426         }
1427 }
1428
1429 bool Document::MatchesWordOptions(bool word, bool wordStart, int pos, int length) {
1430         return (!word && !wordStart) ||
1431                         (word && IsWordAt(pos, pos + length)) ||
1432                         (wordStart && IsWordStartAt(pos));
1433 }
1434
1435 bool Document::HasCaseFolder(void) const {
1436         return pcf != 0;
1437 }
1438
1439 void Document::SetCaseFolder(CaseFolder *pcf_) {
1440         delete pcf;
1441         pcf = pcf_;
1442 }
1443
1444 /**
1445  * Find text in document, supporting both forward and backward
1446  * searches (just pass minPos > maxPos to do a backward search)
1447  * Has not been tested with backwards DBCS searches yet.
1448  */
1449 long Document::FindText(int minPos, int maxPos, const char *search,
1450                         bool caseSensitive, bool word, bool wordStart, bool regExp, int flags,
1451                         int *length) {
1452         if (*length <= 0)
1453                 return minPos;
1454         if (regExp) {
1455                 if (!regex)
1456                         regex = CreateRegexSearch(&charClass);
1457                 return regex->FindText(this, minPos, maxPos, search, caseSensitive, word, wordStart, flags, length);
1458         } else {
1459
1460                 const bool forward = minPos <= maxPos;
1461                 const int increment = forward ? 1 : -1;
1462
1463                 // Range endpoints should not be inside DBCS characters, but just in case, move them.
1464                 const int startPos = MovePositionOutsideChar(minPos, increment, false);
1465                 const int endPos = MovePositionOutsideChar(maxPos, increment, false);
1466
1467                 // Compute actual search ranges needed
1468                 const int lengthFind = *length;
1469
1470                 //Platform::DebugPrintf("Find %d %d %s %d\n", startPos, endPos, ft->lpstrText, lengthFind);
1471                 const int limitPos = Platform::Maximum(startPos, endPos);
1472                 int pos = startPos;
1473                 if (!forward) {
1474                         // Back all of a character
1475                         pos = NextPosition(pos, increment);
1476                 }
1477                 if (caseSensitive) {
1478                         const int endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos;
1479                         const char charStartSearch =  search[0];
1480                         while (forward ? (pos < endSearch) : (pos >= endSearch)) {
1481                                 if (CharAt(pos) == charStartSearch) {
1482                                         bool found = (pos + lengthFind) <= limitPos;
1483                                         for (int indexSearch = 1; (indexSearch < lengthFind) && found; indexSearch++) {
1484                                                 found = CharAt(pos + indexSearch) == search[indexSearch];
1485                                         }
1486                                         if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
1487                                                 return pos;
1488                                         }
1489                                 }
1490                                 if (!NextCharacter(pos, increment))
1491                                         break;
1492                         }
1493                 } else if (SC_CP_UTF8 == dbcsCodePage) {
1494                         const size_t maxFoldingExpansion = 4;
1495                         std::vector<char> searchThing(lengthFind * UTF8MaxBytes * maxFoldingExpansion + 1);
1496                         const int lenSearch = static_cast<int>(
1497                                 pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind));
1498                         char bytes[UTF8MaxBytes + 1];
1499                         char folded[UTF8MaxBytes * maxFoldingExpansion + 1];
1500                         while (forward ? (pos < endPos) : (pos >= endPos)) {
1501                                 int widthFirstCharacter = 0;
1502                                 int posIndexDocument = pos;
1503                                 int indexSearch = 0;
1504                                 bool characterMatches = true;
1505                                 for (;;) {
1506                                         const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(posIndexDocument));
1507                                         bytes[0] = leadByte;
1508                                         int widthChar = 1;
1509                                         if (!UTF8IsAscii(leadByte)) {
1510                                                 const int widthCharBytes = UTF8BytesOfLead[leadByte];
1511                                                 for (int b=1; b<widthCharBytes; b++) {
1512                                                         bytes[b] = cb.CharAt(posIndexDocument+b);
1513                                                 }
1514                                                 widthChar = UTF8Classify(reinterpret_cast<const unsigned char *>(bytes), widthCharBytes) & UTF8MaskWidth;
1515                                         }
1516                                         if (!widthFirstCharacter)
1517                                                 widthFirstCharacter = widthChar;
1518                                         if ((posIndexDocument + widthChar) > limitPos)
1519                                                 break;
1520                                         const int lenFlat = static_cast<int>(pcf->Fold(folded, sizeof(folded), bytes, widthChar));
1521                                         folded[lenFlat] = 0;
1522                                         // Does folded match the buffer
1523                                         characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat);
1524                                         if (!characterMatches)
1525                                                 break;
1526                                         posIndexDocument += widthChar;
1527                                         indexSearch += lenFlat;
1528                                         if (indexSearch >= lenSearch)
1529                                                 break;
1530                                 }
1531                                 if (characterMatches && (indexSearch == static_cast<int>(lenSearch))) {
1532                                         if (MatchesWordOptions(word, wordStart, pos, posIndexDocument - pos)) {
1533                                                 *length = posIndexDocument - pos;
1534                                                 return pos;
1535                                         }
1536                                 }
1537                                 if (forward) {
1538                                         pos += widthFirstCharacter;
1539                                 } else {
1540                                         if (!NextCharacter(pos, increment))
1541                                                 break;
1542                                 }
1543                         }
1544                 } else if (dbcsCodePage) {
1545                         const size_t maxBytesCharacter = 2;
1546                         const size_t maxFoldingExpansion = 4;
1547                         std::vector<char> searchThing(lengthFind * maxBytesCharacter * maxFoldingExpansion + 1);
1548                         const int lenSearch = static_cast<int>(
1549                                 pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind));
1550                         while (forward ? (pos < endPos) : (pos >= endPos)) {
1551                                 int indexDocument = 0;
1552                                 int indexSearch = 0;
1553                                 bool characterMatches = true;
1554                                 while (characterMatches &&
1555                                         ((pos + indexDocument) < limitPos) &&
1556                                         (indexSearch < lenSearch)) {
1557                                         char bytes[maxBytesCharacter + 1];
1558                                         bytes[0] = cb.CharAt(pos + indexDocument);
1559                                         const int widthChar = IsDBCSLeadByte(bytes[0]) ? 2 : 1;
1560                                         if (widthChar == 2)
1561                                                 bytes[1] = cb.CharAt(pos + indexDocument + 1);
1562                                         if ((pos + indexDocument + widthChar) > limitPos)
1563                                                 break;
1564                                         char folded[maxBytesCharacter * maxFoldingExpansion + 1];
1565                                         const int lenFlat = static_cast<int>(pcf->Fold(folded, sizeof(folded), bytes, widthChar));
1566                                         folded[lenFlat] = 0;
1567                                         // Does folded match the buffer
1568                                         characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat);
1569                                         indexDocument += widthChar;
1570                                         indexSearch += lenFlat;
1571                                 }
1572                                 if (characterMatches && (indexSearch == static_cast<int>(lenSearch))) {
1573                                         if (MatchesWordOptions(word, wordStart, pos, indexDocument)) {
1574                                                 *length = indexDocument;
1575                                                 return pos;
1576                                         }
1577                                 }
1578                                 if (!NextCharacter(pos, increment))
1579                                         break;
1580                         }
1581                 } else {
1582                         const int endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos;
1583                         std::vector<char> searchThing(lengthFind + 1);
1584                         pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind);
1585                         while (forward ? (pos < endSearch) : (pos >= endSearch)) {
1586                                 bool found = (pos + lengthFind) <= limitPos;
1587                                 for (int indexSearch = 0; (indexSearch < lengthFind) && found; indexSearch++) {
1588                                         char ch = CharAt(pos + indexSearch);
1589                                         char folded[2];
1590                                         pcf->Fold(folded, sizeof(folded), &ch, 1);
1591                                         found = folded[0] == searchThing[indexSearch];
1592                                 }
1593                                 if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
1594                                         return pos;
1595                                 }
1596                                 if (!NextCharacter(pos, increment))
1597                                         break;
1598                         }
1599                 }
1600         }
1601         //Platform::DebugPrintf("Not found\n");
1602         return -1;
1603 }
1604
1605 const char *Document::SubstituteByPosition(const char *text, int *length) {
1606         if (regex)
1607                 return regex->SubstituteByPosition(this, text, length);
1608         else
1609                 return 0;
1610 }
1611
1612 int Document::LinesTotal() const {
1613         return cb.Lines();
1614 }
1615
1616 void Document::ChangeCase(Range r, bool makeUpperCase) {
1617         for (int pos = r.start; pos < r.end;) {
1618                 int len = LenChar(pos);
1619                 if (len == 1) {
1620                         char ch = CharAt(pos);
1621                         if (makeUpperCase) {
1622                                 if (IsLowerCase(ch)) {
1623                                         ChangeChar(pos, static_cast<char>(MakeUpperCase(ch)));
1624                                 }
1625                         } else {
1626                                 if (IsUpperCase(ch)) {
1627                                         ChangeChar(pos, static_cast<char>(MakeLowerCase(ch)));
1628                                 }
1629                         }
1630                 }
1631                 pos += len;
1632         }
1633 }
1634
1635 void Document::SetDefaultCharClasses(bool includeWordClass) {
1636     charClass.SetDefaultCharClasses(includeWordClass);
1637 }
1638
1639 void Document::SetCharClasses(const unsigned char *chars, CharClassify::cc newCharClass) {
1640     charClass.SetCharClasses(chars, newCharClass);
1641 }
1642
1643 int Document::GetCharsOfClass(CharClassify::cc characterClass, unsigned char *buffer) {
1644     return charClass.GetCharsOfClass(characterClass, buffer);
1645 }
1646
1647 void Document::SetStylingBits(int bits) {
1648         stylingBits = bits;
1649         stylingBitsMask = (1 << stylingBits) - 1;
1650 }
1651
1652 void SCI_METHOD Document::StartStyling(int position, char mask) {
1653         stylingMask = mask;
1654         endStyled = position;
1655 }
1656
1657 bool SCI_METHOD Document::SetStyleFor(int length, char style) {
1658         if (enteredStyling != 0) {
1659                 return false;
1660         } else {
1661                 enteredStyling++;
1662                 style &= stylingMask;
1663                 int prevEndStyled = endStyled;
1664                 if (cb.SetStyleFor(endStyled, length, style, stylingMask)) {
1665                         DocModification mh(SC_MOD_CHANGESTYLE | SC_PERFORMED_USER,
1666                                            prevEndStyled, length);
1667                         NotifyModified(mh);
1668                 }
1669                 endStyled += length;
1670                 enteredStyling--;
1671                 return true;
1672         }
1673 }
1674
1675 bool SCI_METHOD Document::SetStyles(int length, const char *styles) {
1676         if (enteredStyling != 0) {
1677                 return false;
1678         } else {
1679                 enteredStyling++;
1680                 bool didChange = false;
1681                 int startMod = 0;
1682                 int endMod = 0;
1683                 for (int iPos = 0; iPos < length; iPos++, endStyled++) {
1684                         PLATFORM_ASSERT(endStyled < Length());
1685                         if (cb.SetStyleAt(endStyled, styles[iPos], stylingMask)) {
1686                                 if (!didChange) {
1687                                         startMod = endStyled;
1688                                 }
1689                                 didChange = true;
1690                                 endMod = endStyled;
1691                         }
1692                 }
1693                 if (didChange) {
1694                         DocModification mh(SC_MOD_CHANGESTYLE | SC_PERFORMED_USER,
1695                                            startMod, endMod - startMod + 1);
1696                         NotifyModified(mh);
1697                 }
1698                 enteredStyling--;
1699                 return true;
1700         }
1701 }
1702
1703 void Document::EnsureStyledTo(int pos) {
1704         if ((enteredStyling == 0) && (pos > GetEndStyled())) {
1705                 IncrementStyleClock();
1706                 if (pli && !pli->UseContainerLexing()) {
1707                         int lineEndStyled = LineFromPosition(GetEndStyled());
1708                         int endStyledTo = LineStart(lineEndStyled);
1709                         pli->Colourise(endStyledTo, pos);
1710                 } else {
1711                         // Ask the watchers to style, and stop as soon as one responds.
1712                         for (int i = 0; pos > GetEndStyled() && i < lenWatchers; i++) {
1713                                 watchers[i].watcher->NotifyStyleNeeded(this, watchers[i].userData, pos);
1714                         }
1715                 }
1716         }
1717 }
1718
1719 void Document::LexerChanged() {
1720         // Tell the watchers the lexer has changed.
1721         for (int i = 0; i < lenWatchers; i++) {
1722                 watchers[i].watcher->NotifyLexerChanged(this, watchers[i].userData);
1723         }
1724 }
1725
1726 int SCI_METHOD Document::SetLineState(int line, int state) {
1727         int statePrevious = static_cast<LineState *>(perLineData[ldState])->SetLineState(line, state);
1728         if (state != statePrevious) {
1729                 DocModification mh(SC_MOD_CHANGELINESTATE, LineStart(line), 0, 0, 0, line);
1730                 NotifyModified(mh);
1731         }
1732         return statePrevious;
1733 }
1734
1735 int SCI_METHOD Document::GetLineState(int line) const {
1736         return static_cast<LineState *>(perLineData[ldState])->GetLineState(line);
1737 }
1738
1739 int Document::GetMaxLineState() {
1740         return static_cast<LineState *>(perLineData[ldState])->GetMaxLineState();
1741 }
1742
1743 void SCI_METHOD Document::ChangeLexerState(int start, int end) {
1744         DocModification mh(SC_MOD_LEXERSTATE, start, end-start, 0, 0, 0);
1745         NotifyModified(mh);
1746 }
1747
1748 StyledText Document::MarginStyledText(int line) {
1749         LineAnnotation *pla = static_cast<LineAnnotation *>(perLineData[ldMargin]);
1750         return StyledText(pla->Length(line), pla->Text(line),
1751                 pla->MultipleStyles(line), pla->Style(line), pla->Styles(line));
1752 }
1753
1754 void Document::MarginSetText(int line, const char *text) {
1755         static_cast<LineAnnotation *>(perLineData[ldMargin])->SetText(line, text);
1756         DocModification mh(SC_MOD_CHANGEMARGIN, LineStart(line), 0, 0, 0, line);
1757         NotifyModified(mh);
1758 }
1759
1760 void Document::MarginSetStyle(int line, int style) {
1761         static_cast<LineAnnotation *>(perLineData[ldMargin])->SetStyle(line, style);
1762         NotifyModified(DocModification(SC_MOD_CHANGEMARGIN, LineStart(line), 0, 0, 0, line));
1763 }
1764
1765 void Document::MarginSetStyles(int line, const unsigned char *styles) {
1766         static_cast<LineAnnotation *>(perLineData[ldMargin])->SetStyles(line, styles);
1767         NotifyModified(DocModification(SC_MOD_CHANGEMARGIN, LineStart(line), 0, 0, 0, line));
1768 }
1769
1770 int Document::MarginLength(int line) const {
1771         return static_cast<LineAnnotation *>(perLineData[ldMargin])->Length(line);
1772 }
1773
1774 void Document::MarginClearAll() {
1775         int maxEditorLine = LinesTotal();
1776         for (int l=0; l<maxEditorLine; l++)
1777                 MarginSetText(l, 0);
1778         // Free remaining data
1779         static_cast<LineAnnotation *>(perLineData[ldMargin])->ClearAll();
1780 }
1781
1782 bool Document::AnnotationAny() const {
1783         return static_cast<LineAnnotation *>(perLineData[ldAnnotation])->AnySet();
1784 }
1785
1786 StyledText Document::AnnotationStyledText(int line) {
1787         LineAnnotation *pla = static_cast<LineAnnotation *>(perLineData[ldAnnotation]);
1788         return StyledText(pla->Length(line), pla->Text(line),
1789                 pla->MultipleStyles(line), pla->Style(line), pla->Styles(line));
1790 }
1791
1792 void Document::AnnotationSetText(int line, const char *text) {
1793         if (line >= 0 && line < LinesTotal()) {
1794                 const int linesBefore = AnnotationLines(line);
1795                 static_cast<LineAnnotation *>(perLineData[ldAnnotation])->SetText(line, text);
1796                 const int linesAfter = AnnotationLines(line);
1797                 DocModification mh(SC_MOD_CHANGEANNOTATION, LineStart(line), 0, 0, 0, line);
1798                 mh.annotationLinesAdded = linesAfter - linesBefore;
1799                 NotifyModified(mh);
1800         }
1801 }
1802
1803 void Document::AnnotationSetStyle(int line, int style) {
1804         static_cast<LineAnnotation *>(perLineData[ldAnnotation])->SetStyle(line, style);
1805         DocModification mh(SC_MOD_CHANGEANNOTATION, LineStart(line), 0, 0, 0, line);
1806         NotifyModified(mh);
1807 }
1808
1809 void Document::AnnotationSetStyles(int line, const unsigned char *styles) {
1810         if (line >= 0 && line < LinesTotal()) {
1811                 static_cast<LineAnnotation *>(perLineData[ldAnnotation])->SetStyles(line, styles);
1812         }
1813 }
1814
1815 int Document::AnnotationLength(int line) const {
1816         return static_cast<LineAnnotation *>(perLineData[ldAnnotation])->Length(line);
1817 }
1818
1819 int Document::AnnotationLines(int line) const {
1820         return static_cast<LineAnnotation *>(perLineData[ldAnnotation])->Lines(line);
1821 }
1822
1823 void Document::AnnotationClearAll() {
1824         int maxEditorLine = LinesTotal();
1825         for (int l=0; l<maxEditorLine; l++)
1826                 AnnotationSetText(l, 0);
1827         // Free remaining data
1828         static_cast<LineAnnotation *>(perLineData[ldAnnotation])->ClearAll();
1829 }
1830
1831 void Document::IncrementStyleClock() {
1832         styleClock = (styleClock + 1) % 0x100000;
1833 }
1834
1835 void SCI_METHOD Document::DecorationFillRange(int position, int value, int fillLength) {
1836         if (decorations.FillRange(position, value, fillLength)) {
1837                 DocModification mh(SC_MOD_CHANGEINDICATOR | SC_PERFORMED_USER,
1838                                                         position, fillLength);
1839                 NotifyModified(mh);
1840         }
1841 }
1842
1843 bool Document::AddWatcher(DocWatcher *watcher, void *userData) {
1844         for (int i = 0; i < lenWatchers; i++) {
1845                 if ((watchers[i].watcher == watcher) &&
1846                         (watchers[i].userData == userData))
1847                         return false;
1848         }
1849         WatcherWithUserData *pwNew = new WatcherWithUserData[lenWatchers + 1];
1850         for (int j = 0; j < lenWatchers; j++)
1851                 pwNew[j] = watchers[j];
1852         pwNew[lenWatchers].watcher = watcher;
1853         pwNew[lenWatchers].userData = userData;
1854         delete []watchers;
1855         watchers = pwNew;
1856         lenWatchers++;
1857         return true;
1858 }
1859
1860 bool Document::RemoveWatcher(DocWatcher *watcher, void *userData) {
1861         for (int i = 0; i < lenWatchers; i++) {
1862                 if ((watchers[i].watcher == watcher) &&
1863                         (watchers[i].userData == userData)) {
1864                         if (lenWatchers == 1) {
1865                                 delete []watchers;
1866                                 watchers = 0;
1867                                 lenWatchers = 0;
1868                         } else {
1869                                 WatcherWithUserData *pwNew = new WatcherWithUserData[lenWatchers];
1870                                 for (int j = 0; j < lenWatchers - 1; j++) {
1871                                         pwNew[j] = (j < i) ? watchers[j] : watchers[j + 1];
1872                                 }
1873                                 delete []watchers;
1874                                 watchers = pwNew;
1875                                 lenWatchers--;
1876                         }
1877                         return true;
1878                 }
1879         }
1880         return false;
1881 }
1882
1883 void Document::NotifyModifyAttempt() {
1884         for (int i = 0; i < lenWatchers; i++) {
1885                 watchers[i].watcher->NotifyModifyAttempt(this, watchers[i].userData);
1886         }
1887 }
1888
1889 void Document::NotifySavePoint(bool atSavePoint) {
1890         for (int i = 0; i < lenWatchers; i++) {
1891                 watchers[i].watcher->NotifySavePoint(this, watchers[i].userData, atSavePoint);
1892         }
1893 }
1894
1895 void Document::NotifyModified(DocModification mh) {
1896         if (mh.modificationType & SC_MOD_INSERTTEXT) {
1897                 decorations.InsertSpace(mh.position, mh.length);
1898         } else if (mh.modificationType & SC_MOD_DELETETEXT) {
1899                 decorations.DeleteRange(mh.position, mh.length);
1900         }
1901         for (int i = 0; i < lenWatchers; i++) {
1902                 watchers[i].watcher->NotifyModified(this, mh, watchers[i].userData);
1903         }
1904 }
1905
1906 bool Document::IsWordPartSeparator(char ch) {
1907         return (WordCharClass(ch) == CharClassify::ccWord) && IsPunctuation(ch);
1908 }
1909
1910 int Document::WordPartLeft(int pos) {
1911         if (pos > 0) {
1912                 --pos;
1913                 char startChar = cb.CharAt(pos);
1914                 if (IsWordPartSeparator(startChar)) {
1915                         while (pos > 0 && IsWordPartSeparator(cb.CharAt(pos))) {
1916                                 --pos;
1917                         }
1918                 }
1919                 if (pos > 0) {
1920                         startChar = cb.CharAt(pos);
1921                         --pos;
1922                         if (IsLowerCase(startChar)) {
1923                                 while (pos > 0 && IsLowerCase(cb.CharAt(pos)))
1924                                         --pos;
1925                                 if (!IsUpperCase(cb.CharAt(pos)) && !IsLowerCase(cb.CharAt(pos)))
1926                                         ++pos;
1927                         } else if (IsUpperCase(startChar)) {
1928                                 while (pos > 0 && IsUpperCase(cb.CharAt(pos)))
1929                                         --pos;
1930                                 if (!IsUpperCase(cb.CharAt(pos)))
1931                                         ++pos;
1932                         } else if (IsADigit(startChar)) {
1933                                 while (pos > 0 && IsADigit(cb.CharAt(pos)))
1934                                         --pos;
1935                                 if (!IsADigit(cb.CharAt(pos)))
1936                                         ++pos;
1937                         } else if (IsPunctuation(startChar)) {
1938                                 while (pos > 0 && IsPunctuation(cb.CharAt(pos)))
1939                                         --pos;
1940                                 if (!IsPunctuation(cb.CharAt(pos)))
1941                                         ++pos;
1942                         } else if (isspacechar(startChar)) {
1943                                 while (pos > 0 && isspacechar(cb.CharAt(pos)))
1944                                         --pos;
1945                                 if (!isspacechar(cb.CharAt(pos)))
1946                                         ++pos;
1947                         } else if (!isascii(startChar)) {
1948                                 while (pos > 0 && !isascii(cb.CharAt(pos)))
1949                                         --pos;
1950                                 if (isascii(cb.CharAt(pos)))
1951                                         ++pos;
1952                         } else {
1953                                 ++pos;
1954                         }
1955                 }
1956         }
1957         return pos;
1958 }
1959
1960 int Document::WordPartRight(int pos) {
1961         char startChar = cb.CharAt(pos);
1962         int length = Length();
1963         if (IsWordPartSeparator(startChar)) {
1964                 while (pos < length && IsWordPartSeparator(cb.CharAt(pos)))
1965                         ++pos;
1966                 startChar = cb.CharAt(pos);
1967         }
1968         if (!isascii(startChar)) {
1969                 while (pos < length && !isascii(cb.CharAt(pos)))
1970                         ++pos;
1971         } else if (IsLowerCase(startChar)) {
1972                 while (pos < length && IsLowerCase(cb.CharAt(pos)))
1973                         ++pos;
1974         } else if (IsUpperCase(startChar)) {
1975                 if (IsLowerCase(cb.CharAt(pos + 1))) {
1976                         ++pos;
1977                         while (pos < length && IsLowerCase(cb.CharAt(pos)))
1978                                 ++pos;
1979                 } else {
1980                         while (pos < length && IsUpperCase(cb.CharAt(pos)))
1981                                 ++pos;
1982                 }
1983                 if (IsLowerCase(cb.CharAt(pos)) && IsUpperCase(cb.CharAt(pos - 1)))
1984                         --pos;
1985         } else if (IsADigit(startChar)) {
1986                 while (pos < length && IsADigit(cb.CharAt(pos)))
1987                         ++pos;
1988         } else if (IsPunctuation(startChar)) {
1989                 while (pos < length && IsPunctuation(cb.CharAt(pos)))
1990                         ++pos;
1991         } else if (isspacechar(startChar)) {
1992                 while (pos < length && isspacechar(cb.CharAt(pos)))
1993                         ++pos;
1994         } else {
1995                 ++pos;
1996         }
1997         return pos;
1998 }
1999
2000 bool IsLineEndChar(char c) {
2001         return (c == '\n' || c == '\r');
2002 }
2003
2004 int Document::ExtendStyleRange(int pos, int delta, bool singleLine) {
2005         int sStart = cb.StyleAt(pos);
2006         if (delta < 0) {
2007                 while (pos > 0 && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsLineEndChar(cb.CharAt(pos))))
2008                         pos--;
2009                 pos++;
2010         } else {
2011                 while (pos < (Length()) && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsLineEndChar(cb.CharAt(pos))))
2012                         pos++;
2013         }
2014         return pos;
2015 }
2016
2017 static char BraceOpposite(char ch) {
2018         switch (ch) {
2019         case '(':
2020                 return ')';
2021         case ')':
2022                 return '(';
2023         case '[':
2024                 return ']';
2025         case ']':
2026                 return '[';
2027         case '{':
2028                 return '}';
2029         case '}':
2030                 return '{';
2031         case '<':
2032                 return '>';
2033         case '>':
2034                 return '<';
2035         default:
2036                 return '\0';
2037         }
2038 }
2039
2040 // TODO: should be able to extend styled region to find matching brace
2041 int Document::BraceMatch(int position, int /*maxReStyle*/) {
2042         char chBrace = CharAt(position);
2043         char chSeek = BraceOpposite(chBrace);
2044         if (chSeek == '\0')
2045                 return - 1;
2046         char styBrace = static_cast<char>(StyleAt(position) & stylingBitsMask);
2047         int direction = -1;
2048         if (chBrace == '(' || chBrace == '[' || chBrace == '{' || chBrace == '<')
2049                 direction = 1;
2050         int depth = 1;
2051         position = NextPosition(position, direction);
2052         while ((position >= 0) && (position < Length())) {
2053                 char chAtPos = CharAt(position);
2054                 char styAtPos = static_cast<char>(StyleAt(position) & stylingBitsMask);
2055                 if ((position > GetEndStyled()) || (styAtPos == styBrace)) {
2056                         if (chAtPos == chBrace)
2057                                 depth++;
2058                         if (chAtPos == chSeek)
2059                                 depth--;
2060                         if (depth == 0)
2061                                 return position;
2062                 }
2063                 int positionBeforeMove = position;
2064                 position = NextPosition(position, direction);
2065                 if (position == positionBeforeMove)
2066                         break;
2067         }
2068         return - 1;
2069 }
2070
2071 /**
2072  * Implementation of RegexSearchBase for the default built-in regular expression engine
2073  */
2074 class BuiltinRegex : public RegexSearchBase {
2075 public:
2076         BuiltinRegex(CharClassify *charClassTable) : search(charClassTable), substituted(NULL) {}
2077
2078         virtual ~BuiltinRegex() {
2079                 delete substituted;
2080         }
2081
2082         virtual long FindText(Document *doc, int minPos, int maxPos, const char *s,
2083                         bool caseSensitive, bool word, bool wordStart, int flags,
2084                         int *length);
2085
2086         virtual const char *SubstituteByPosition(Document *doc, const char *text, int *length);
2087
2088 private:
2089         RESearch search;
2090         char *substituted;
2091 };
2092
2093 // Define a way for the Regular Expression code to access the document
2094 class DocumentIndexer : public CharacterIndexer {
2095         Document *pdoc;
2096         int end;
2097 public:
2098         DocumentIndexer(Document *pdoc_, int end_) :
2099                 pdoc(pdoc_), end(end_) {
2100         }
2101
2102         virtual ~DocumentIndexer() {
2103         }
2104
2105         virtual char CharAt(int index) {
2106                 if (index < 0 || index >= end)
2107                         return 0;
2108                 else
2109                         return pdoc->CharAt(index);
2110         }
2111 };
2112
2113 long BuiltinRegex::FindText(Document *doc, int minPos, int maxPos, const char *s,
2114                         bool caseSensitive, bool, bool, int flags,
2115                         int *length) {
2116         bool posix = (flags & SCFIND_POSIX) != 0;
2117         int increment = (minPos <= maxPos) ? 1 : -1;
2118
2119         int startPos = minPos;
2120         int endPos = maxPos;
2121
2122         // Range endpoints should not be inside DBCS characters, but just in case, move them.
2123         startPos = doc->MovePositionOutsideChar(startPos, 1, false);
2124         endPos = doc->MovePositionOutsideChar(endPos, 1, false);
2125
2126         const char *errmsg = search.Compile(s, *length, caseSensitive, posix);
2127         if (errmsg) {
2128                 return -1;
2129         }
2130         // Find a variable in a property file: \$(\([A-Za-z0-9_.]+\))
2131         // Replace first '.' with '-' in each property file variable reference:
2132         //     Search: \$(\([A-Za-z0-9_-]+\)\.\([A-Za-z0-9_.]+\))
2133         //     Replace: $(\1-\2)
2134         int lineRangeStart = doc->LineFromPosition(startPos);
2135         int lineRangeEnd = doc->LineFromPosition(endPos);
2136         if ((increment == 1) &&
2137                 (startPos >= doc->LineEnd(lineRangeStart)) &&
2138                 (lineRangeStart < lineRangeEnd)) {
2139                 // the start position is at end of line or between line end characters.
2140                 lineRangeStart++;
2141                 startPos = doc->LineStart(lineRangeStart);
2142         } else if ((increment == -1) &&
2143                    (startPos <= doc->LineStart(lineRangeStart)) &&
2144                    (lineRangeStart > lineRangeEnd)) {
2145                 // the start position is at beginning of line.
2146                 lineRangeStart--;
2147                 startPos = doc->LineEnd(lineRangeStart);
2148         }
2149         int pos = -1;
2150         int lenRet = 0;
2151         char searchEnd = s[*length - 1];
2152         char searchEndPrev = (*length > 1) ? s[*length - 2] : '\0';
2153         int lineRangeBreak = lineRangeEnd + increment;
2154         for (int line = lineRangeStart; line != lineRangeBreak; line += increment) {
2155                 int startOfLine = doc->LineStart(line);
2156                 int endOfLine = doc->LineEnd(line);
2157                 if (increment == 1) {
2158                         if (line == lineRangeStart) {
2159                                 if ((startPos != startOfLine) && (s[0] == '^'))
2160                                         continue;       // Can't match start of line if start position after start of line
2161                                 startOfLine = startPos;
2162                         }
2163                         if (line == lineRangeEnd) {
2164                                 if ((endPos != endOfLine) && (searchEnd == '$') && (searchEndPrev != '\\'))
2165                                         continue;       // Can't match end of line if end position before end of line
2166                                 endOfLine = endPos;
2167                         }
2168                 } else {
2169                         if (line == lineRangeEnd) {
2170                                 if ((endPos != startOfLine) && (s[0] == '^'))
2171                                         continue;       // Can't match start of line if end position after start of line
2172                                 startOfLine = endPos;
2173                         }
2174                         if (line == lineRangeStart) {
2175                                 if ((startPos != endOfLine) && (searchEnd == '$') && (searchEndPrev != '\\'))
2176                                         continue;       // Can't match end of line if start position before end of line
2177                                 endOfLine = startPos;
2178                         }
2179                 }
2180
2181                 DocumentIndexer di(doc, endOfLine);
2182                 int success = search.Execute(di, startOfLine, endOfLine);
2183                 if (success) {
2184                         pos = search.bopat[0];
2185                         lenRet = search.eopat[0] - search.bopat[0];
2186                         // There can be only one start of a line, so no need to look for last match in line
2187                         if ((increment == -1) && (s[0] != '^')) {
2188                                 // Check for the last match on this line.
2189                                 int repetitions = 1000; // Break out of infinite loop
2190                                 while (success && (search.eopat[0] <= endOfLine) && (repetitions--)) {
2191                                         success = search.Execute(di, pos+1, endOfLine);
2192                                         if (success) {
2193                                                 if (search.eopat[0] <= minPos) {
2194                                                         pos = search.bopat[0];
2195                                                         lenRet = search.eopat[0] - search.bopat[0];
2196                                                 } else {
2197                                                         success = 0;
2198                                                 }
2199                                         }
2200                                 }
2201                         }
2202                         break;
2203                 }
2204         }
2205         *length = lenRet;
2206         return pos;
2207 }
2208
2209 const char *BuiltinRegex::SubstituteByPosition(Document *doc, const char *text, int *length) {
2210         delete []substituted;
2211         substituted = 0;
2212         DocumentIndexer di(doc, doc->Length());
2213         if (!search.GrabMatches(di))
2214                 return 0;
2215         unsigned int lenResult = 0;
2216         for (int i = 0; i < *length; i++) {
2217                 if (text[i] == '\\') {
2218                         if (text[i + 1] >= '0' && text[i + 1] <= '9') {
2219                                 unsigned int patNum = text[i + 1] - '0';
2220                                 lenResult += search.eopat[patNum] - search.bopat[patNum];
2221                                 i++;
2222                         } else {
2223                                 switch (text[i + 1]) {
2224                                 case 'a':
2225                                 case 'b':
2226                                 case 'f':
2227                                 case 'n':
2228                                 case 'r':
2229                                 case 't':
2230                                 case 'v':
2231                                 case '\\':
2232                                         i++;
2233                                 }
2234                                 lenResult++;
2235                         }
2236                 } else {
2237                         lenResult++;
2238                 }
2239         }
2240         substituted = new char[lenResult + 1];
2241         char *o = substituted;
2242         for (int j = 0; j < *length; j++) {
2243                 if (text[j] == '\\') {
2244                         if (text[j + 1] >= '0' && text[j + 1] <= '9') {
2245                                 unsigned int patNum = text[j + 1] - '0';
2246                                 unsigned int len = search.eopat[patNum] - search.bopat[patNum];
2247                                 if (search.pat[patNum]) // Will be null if try for a match that did not occur
2248                                         memcpy(o, search.pat[patNum], len);
2249                                 o += len;
2250                                 j++;
2251                         } else {
2252                                 j++;
2253                                 switch (text[j]) {
2254                                 case 'a':
2255                                         *o++ = '\a';
2256                                         break;
2257                                 case 'b':
2258                                         *o++ = '\b';
2259                                         break;
2260                                 case 'f':
2261                                         *o++ = '\f';
2262                                         break;
2263                                 case 'n':
2264                                         *o++ = '\n';
2265                                         break;
2266                                 case 'r':
2267                                         *o++ = '\r';
2268                                         break;
2269                                 case 't':
2270                                         *o++ = '\t';
2271                                         break;
2272                                 case 'v':
2273                                         *o++ = '\v';
2274                                         break;
2275                                 case '\\':
2276                                         *o++ = '\\';
2277                                         break;
2278                                 default:
2279                                         *o++ = '\\';
2280                                         j--;
2281                                 }
2282                         }
2283                 } else {
2284                         *o++ = text[j];
2285                 }
2286         }
2287         *o = '\0';
2288         *length = lenResult;
2289         return substituted;
2290 }
2291
2292 #ifndef SCI_OWNREGEX
2293
2294 #ifdef SCI_NAMESPACE
2295
2296 RegexSearchBase *Scintilla::CreateRegexSearch(CharClassify *charClassTable) {
2297         return new BuiltinRegex(charClassTable);
2298 }
2299
2300 #else
2301
2302 RegexSearchBase *CreateRegexSearch(CharClassify *charClassTable) {
2303         return new BuiltinRegex(charClassTable);
2304 }
2305
2306 #endif
2307
2308 #endif