scintilla/src/Document.cxx

   1 // Scintilla source code edit control
   2 /** @file Document.cxx
   3  ** Text document that handles notifications, DBCS, styling, words and end of line.
   4  **/
   5 // Copyright 1998-2011 by Neil Hodgson <neilh@scintilla.org>
   6 // The License.txt file describes the conditions under which this software may be distributed.
   7
   8 #include <stdlib.h>
   9 #include <string.h>
  10 #include <stdio.h>
  11 #include <assert.h>
  12 #include <ctype.h>
  13
  14 #include <string>
  15 #include <vector>
  16 #include <algorithm>
  17
  18 #include "Platform.h"
  19
  20 #include "ILexer.h"
  21 #include "Scintilla.h"
  22
  23 #include "CharacterSet.h"
  24 #include "SplitVector.h"
  25 #include "Partitioning.h"
  26 #include "RunStyles.h"
  27 #include "CellBuffer.h"
  28 #include "PerLine.h"
  29 #include "CharClassify.h"
  30 #include "Decoration.h"
  31 #include "CaseFolder.h"
  32 #include "Document.h"
  33 #include "RESearch.h"
  34 #include "UniConversion.h"
  35
  36 #ifdef SCI_NAMESPACE
  37 using namespace Scintilla;
  38 #endif
  39
  40 static inline bool IsPunctuation(char ch) {
  41         return IsASCII(ch) && ispunct(ch);
  42 }
  43
  44 void LexInterface::Colourise(int start, int end) {
  45         if (pdoc && instance && !performingStyle) {
  46                 // Protect against reentrance, which may occur, for example, when
  47                 // fold points are discovered while performing styling and the folding
  48                 // code looks for child lines which may trigger styling.
  49                 performingStyle = true;
  50
  51                 int lengthDoc = pdoc->Length();
  52                 if (end == -1)
  53                         end = lengthDoc;
  54                 int len = end - start;
  55
  56                 PLATFORM_ASSERT(len >= 0);
  57                 PLATFORM_ASSERT(start + len <= lengthDoc);
  58
  59                 int styleStart = 0;
  60                 if (start > 0)
  61                         styleStart = pdoc->StyleAt(start - 1);
  62
  63                 if (len > 0) {
  64                         instance->Lex(start, len, styleStart, pdoc);
  65                         instance->Fold(start, len, styleStart, pdoc);
  66                 }
  67
  68                 performingStyle = false;
  69         }
  70 }
  71
  72 int LexInterface::LineEndTypesSupported() {
  73         if (instance) {
  74                 int interfaceVersion = instance->Version();
  75                 if (interfaceVersion >= lvSubStyles) {
  76                         ILexerWithSubStyles *ssinstance = static_cast<ILexerWithSubStyles *>(instance);
  77                         return ssinstance->LineEndTypesSupported();
  78                 }
  79         }
  80         return 0;
  81 }
  82
  83 Document::Document() {
  84         refCount = 0;
  85         pcf = NULL;
  86 #ifdef _WIN32
  87         eolMode = SC_EOL_CRLF;
  88 #else
  89         eolMode = SC_EOL_LF;
  90 #endif
  91         dbcsCodePage = 0;
  92         lineEndBitSet = SC_LINE_END_TYPE_DEFAULT;
  93         endStyled = 0;
  94         styleClock = 0;
  95         enteredModification = 0;
  96         enteredStyling = 0;
  97         enteredReadOnlyCount = 0;
  98         insertionSet = false;
  99         tabInChars = 8;
 100         indentInChars = 0;
 101         actualIndentInChars = 8;
 102         useTabs = true;
 103         tabIndents = true;
 104         backspaceUnindents = false;
 105
 106         matchesValid = false;
 107         regex = 0;
 108
 109         UTF8BytesOfLeadInitialise();
 110
 111         perLineData[ldMarkers] = new LineMarkers();
 112         perLineData[ldLevels] = new LineLevels();
 113         perLineData[ldState] = new LineState();
 114         perLineData[ldMargin] = new LineAnnotation();
 115         perLineData[ldAnnotation] = new LineAnnotation();
 116
 117         cb.SetPerLine(this);
 118
 119         pli = 0;
 120 }
 121
 122 Document::~Document() {
 123         for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
 124                 it->watcher->NotifyDeleted(this, it->userData);
 125         }
 126         for (int j=0; j<ldSize; j++) {
 127                 delete perLineData[j];
 128                 perLineData[j] = 0;
 129         }
 130         delete regex;
 131         regex = 0;
 132         delete pli;
 133         pli = 0;
 134         delete pcf;
 135         pcf = 0;
 136 }
 137
 138 void Document::Init() {
 139         for (int j=0; j<ldSize; j++) {
 140                 if (perLineData[j])
 141                         perLineData[j]->Init();
 142         }
 143 }
 144
 145 int Document::LineEndTypesSupported() const {
 146         if ((SC_CP_UTF8 == dbcsCodePage) && pli)
 147                 return pli->LineEndTypesSupported();
 148         else
 149                 return 0;
 150 }
 151
 152 bool Document::SetDBCSCodePage(int dbcsCodePage_) {
 153         if (dbcsCodePage != dbcsCodePage_) {
 154                 dbcsCodePage = dbcsCodePage_;
 155                 SetCaseFolder(NULL);
 156                 cb.SetLineEndTypes(lineEndBitSet & LineEndTypesSupported());
 157                 return true;
 158         } else {
 159                 return false;
 160         }
 161 }
 162
 163 bool Document::SetLineEndTypesAllowed(int lineEndBitSet_) {
 164         if (lineEndBitSet != lineEndBitSet_) {
 165                 lineEndBitSet = lineEndBitSet_;
 166                 int lineEndBitSetActive = lineEndBitSet & LineEndTypesSupported();
 167                 if (lineEndBitSetActive != cb.GetLineEndTypes()) {
 168                         ModifiedAt(0);
 169                         cb.SetLineEndTypes(lineEndBitSetActive);
 170                         return true;
 171                 } else {
 172                         return false;
 173                 }
 174         } else {
 175                 return false;
 176         }
 177 }
 178
 179 void Document::InsertLine(int line) {
 180         for (int j=0; j<ldSize; j++) {
 181                 if (perLineData[j])
 182                         perLineData[j]->InsertLine(line);
 183         }
 184 }
 185
 186 void Document::RemoveLine(int line) {
 187         for (int j=0; j<ldSize; j++) {
 188                 if (perLineData[j])
 189                         perLineData[j]->RemoveLine(line);
 190         }
 191 }
 192
 193 // Increase reference count and return its previous value.
 194 int Document::AddRef() {
 195         return refCount++;
 196 }
 197
 198 // Decrease reference count and return its previous value.
 199 // Delete the document if reference count reaches zero.
 200 int SCI_METHOD Document::Release() {
 201         int curRefCount = --refCount;
 202         if (curRefCount == 0)
 203                 delete this;
 204         return curRefCount;
 205 }
 206
 207 void Document::SetSavePoint() {
 208         cb.SetSavePoint();
 209         NotifySavePoint(true);
 210 }
 211
 212 void Document::TentativeUndo() {
 213         CheckReadOnly();
 214         if (enteredModification == 0) {
 215                 enteredModification++;
 216                 if (!cb.IsReadOnly()) {
 217                         bool startSavePoint = cb.IsSavePoint();
 218                         bool multiLine = false;
 219                         int steps = cb.TentativeSteps();
 220                         //Platform::DebugPrintf("Steps=%d\n", steps);
 221                         for (int step = 0; step < steps; step++) {
 222                                 const int prevLinesTotal = LinesTotal();
 223                                 const Action &action = cb.GetUndoStep();
 224                                 if (action.at == removeAction) {
 225                                         NotifyModified(DocModification(
 226                                                                         SC_MOD_BEFOREINSERT | SC_PERFORMED_UNDO, action));
 227                                 } else if (action.at == containerAction) {
 228                                         DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_UNDO);
 229                                         dm.token = action.position;
 230                                         NotifyModified(dm);
 231                                 } else {
 232                                         NotifyModified(DocModification(
 233                                                                         SC_MOD_BEFOREDELETE | SC_PERFORMED_UNDO, action));
 234                                 }
 235                                 cb.PerformUndoStep();
 236                                 if (action.at != containerAction) {
 237                                         ModifiedAt(action.position);
 238                                 }
 239
 240                                 int modFlags = SC_PERFORMED_UNDO;
 241                                 // With undo, an insertion action becomes a deletion notification
 242                                 if (action.at == removeAction) {
 243                                         modFlags |= SC_MOD_INSERTTEXT;
 244                                 } else if (action.at == insertAction) {
 245                                         modFlags |= SC_MOD_DELETETEXT;
 246                                 }
 247                                 if (steps > 1)
 248                                         modFlags |= SC_MULTISTEPUNDOREDO;
 249                                 const int linesAdded = LinesTotal() - prevLinesTotal;
 250                                 if (linesAdded != 0)
 251                                         multiLine = true;
 252                                 if (step == steps - 1) {
 253                                         modFlags |= SC_LASTSTEPINUNDOREDO;
 254                                         if (multiLine)
 255                                                 modFlags |= SC_MULTILINEUNDOREDO;
 256                                 }
 257                                 NotifyModified(DocModification(modFlags, action.position, action.lenData,
 258                                                                                            linesAdded, action.data));
 259                         }
 260
 261                         bool endSavePoint = cb.IsSavePoint();
 262                         if (startSavePoint != endSavePoint)
 263                                 NotifySavePoint(endSavePoint);
 264
 265                         cb.TentativeCommit();
 266                 }
 267                 enteredModification--;
 268         }
 269 }
 270
 271 int Document::GetMark(int line) {
 272         return static_cast<LineMarkers *>(perLineData[ldMarkers])->MarkValue(line);
 273 }
 274
 275 int Document::MarkerNext(int lineStart, int mask) const {
 276         return static_cast<LineMarkers *>(perLineData[ldMarkers])->MarkerNext(lineStart, mask);
 277 }
 278
 279 int Document::AddMark(int line, int markerNum) {
 280         if (line >= 0 && line <= LinesTotal()) {
 281                 int prev = static_cast<LineMarkers *>(perLineData[ldMarkers])->
 282                         AddMark(line, markerNum, LinesTotal());
 283                 DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, 0, line);
 284                 NotifyModified(mh);
 285                 return prev;
 286         } else {
 287                 return 0;
 288         }
 289 }
 290
 291 void Document::AddMarkSet(int line, int valueSet) {
 292         if (line < 0 || line > LinesTotal()) {
 293                 return;
 294         }
 295         unsigned int m = valueSet;
 296         for (int i = 0; m; i++, m >>= 1)
 297                 if (m & 1)
 298                         static_cast<LineMarkers *>(perLineData[ldMarkers])->
 299                                 AddMark(line, i, LinesTotal());
 300         DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, 0, line);
 301         NotifyModified(mh);
 302 }
 303
 304 void Document::DeleteMark(int line, int markerNum) {
 305         static_cast<LineMarkers *>(perLineData[ldMarkers])->DeleteMark(line, markerNum, false);
 306         DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, 0, line);
 307         NotifyModified(mh);
 308 }
 309
 310 void Document::DeleteMarkFromHandle(int markerHandle) {
 311         static_cast<LineMarkers *>(perLineData[ldMarkers])->DeleteMarkFromHandle(markerHandle);
 312         DocModification mh(SC_MOD_CHANGEMARKER, 0, 0, 0, 0);
 313         mh.line = -1;
 314         NotifyModified(mh);
 315 }
 316
 317 void Document::DeleteAllMarks(int markerNum) {
 318         bool someChanges = false;
 319         for (int line = 0; line < LinesTotal(); line++) {
 320                 if (static_cast<LineMarkers *>(perLineData[ldMarkers])->DeleteMark(line, markerNum, true))
 321                         someChanges = true;
 322         }
 323         if (someChanges) {
 324                 DocModification mh(SC_MOD_CHANGEMARKER, 0, 0, 0, 0);
 325                 mh.line = -1;
 326                 NotifyModified(mh);
 327         }
 328 }
 329
 330 int Document::LineFromHandle(int markerHandle) {
 331         return static_cast<LineMarkers *>(perLineData[ldMarkers])->LineFromHandle(markerHandle);
 332 }
 333
 334 int SCI_METHOD Document::LineStart(int line) const {
 335         return cb.LineStart(line);
 336 }
 337
 338 int SCI_METHOD Document::LineEnd(int line) const {
 339         if (line >= LinesTotal() - 1) {
 340                 return LineStart(line + 1);
 341         } else {
 342                 int position = LineStart(line + 1);
 343                 if (SC_CP_UTF8 == dbcsCodePage) {
 344                         unsigned char bytes[] = {
 345                                 static_cast<unsigned char>(cb.CharAt(position-3)),
 346                                 static_cast<unsigned char>(cb.CharAt(position-2)),
 347                                 static_cast<unsigned char>(cb.CharAt(position-1)),
 348                         };
 349                         if (UTF8IsSeparator(bytes)) {
 350                                 return position - UTF8SeparatorLength;
 351                         }
 352                         if (UTF8IsNEL(bytes+1)) {
 353                                 return position - UTF8NELLength;
 354                         }
 355                 }
 356                 position--; // Back over CR or LF
 357                 // When line terminator is CR+LF, may need to go back one more
 358                 if ((position > LineStart(line)) && (cb.CharAt(position - 1) == '\r')) {
 359                         position--;
 360                 }
 361                 return position;
 362         }
 363 }
 364
 365 void SCI_METHOD Document::SetErrorStatus(int status) {
 366         // Tell the watchers an error has occurred.
 367         for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
 368                 it->watcher->NotifyErrorOccurred(this, it->userData, status);
 369         }
 370 }
 371
 372 int SCI_METHOD Document::LineFromPosition(int pos) const {
 373         return cb.LineFromPosition(pos);
 374 }
 375
 376 int Document::LineEndPosition(int position) const {
 377         return LineEnd(LineFromPosition(position));
 378 }
 379
 380 bool Document::IsLineEndPosition(int position) const {
 381         return LineEnd(LineFromPosition(position)) == position;
 382 }
 383
 384 bool Document::IsPositionInLineEnd(int position) const {
 385         return position >= LineEnd(LineFromPosition(position));
 386 }
 387
 388 int Document::VCHomePosition(int position) const {
 389         int line = LineFromPosition(position);
 390         int startPosition = LineStart(line);
 391         int endLine = LineEnd(line);
 392         int startText = startPosition;
 393         while (startText < endLine && (cb.CharAt(startText) == ' ' || cb.CharAt(startText) == '\t'))
 394                 startText++;
 395         if (position == startText)
 396                 return startPosition;
 397         else
 398                 return startText;
 399 }
 400
 401 int SCI_METHOD Document::SetLevel(int line, int level) {
 402         int prev = static_cast<LineLevels *>(perLineData[ldLevels])->SetLevel(line, level, LinesTotal());
 403         if (prev != level) {
 404                 DocModification mh(SC_MOD_CHANGEFOLD | SC_MOD_CHANGEMARKER,
 405                                    LineStart(line), 0, 0, 0, line);
 406                 mh.foldLevelNow = level;
 407                 mh.foldLevelPrev = prev;
 408                 NotifyModified(mh);
 409         }
 410         return prev;
 411 }
 412
 413 int SCI_METHOD Document::GetLevel(int line) const {
 414         return static_cast<LineLevels *>(perLineData[ldLevels])->GetLevel(line);
 415 }
 416
 417 void Document::ClearLevels() {
 418         static_cast<LineLevels *>(perLineData[ldLevels])->ClearLevels();
 419 }
 420
 421 static bool IsSubordinate(int levelStart, int levelTry) {
 422         if (levelTry & SC_FOLDLEVELWHITEFLAG)
 423                 return true;
 424         else
 425                 return (levelStart & SC_FOLDLEVELNUMBERMASK) < (levelTry & SC_FOLDLEVELNUMBERMASK);
 426 }
 427
 428 int Document::GetLastChild(int lineParent, int level, int lastLine) {
 429         if (level == -1)
 430                 level = GetLevel(lineParent) & SC_FOLDLEVELNUMBERMASK;
 431         int maxLine = LinesTotal();
 432         int lookLastLine = (lastLine != -1) ? Platform::Minimum(LinesTotal() - 1, lastLine) : -1;
 433         int lineMaxSubord = lineParent;
 434         while (lineMaxSubord < maxLine - 1) {
 435                 EnsureStyledTo(LineStart(lineMaxSubord + 2));
 436                 if (!IsSubordinate(level, GetLevel(lineMaxSubord + 1)))
 437                         break;
 438                 if ((lookLastLine != -1) && (lineMaxSubord >= lookLastLine) && !(GetLevel(lineMaxSubord) & SC_FOLDLEVELWHITEFLAG))
 439                         break;
 440                 lineMaxSubord++;
 441         }
 442         if (lineMaxSubord > lineParent) {
 443                 if (level > (GetLevel(lineMaxSubord + 1) & SC_FOLDLEVELNUMBERMASK)) {
 444                         // Have chewed up some whitespace that belongs to a parent so seek back
 445                         if (GetLevel(lineMaxSubord) & SC_FOLDLEVELWHITEFLAG) {
 446                                 lineMaxSubord--;
 447                         }
 448                 }
 449         }
 450         return lineMaxSubord;
 451 }
 452
 453 int Document::GetFoldParent(int line) const {
 454         int level = GetLevel(line) & SC_FOLDLEVELNUMBERMASK;
 455         int lineLook = line - 1;
 456         while ((lineLook > 0) && (
 457                     (!(GetLevel(lineLook) & SC_FOLDLEVELHEADERFLAG)) ||
 458                     ((GetLevel(lineLook) & SC_FOLDLEVELNUMBERMASK) >= level))
 459               ) {
 460                 lineLook--;
 461         }
 462         if ((GetLevel(lineLook) & SC_FOLDLEVELHEADERFLAG) &&
 463                 ((GetLevel(lineLook) & SC_FOLDLEVELNUMBERMASK) < level)) {
 464                 return lineLook;
 465         } else {
 466                 return -1;
 467         }
 468 }
 469
 470 void Document::GetHighlightDelimiters(HighlightDelimiter &highlightDelimiter, int line, int lastLine) {
 471         int level = GetLevel(line);
 472         int lookLastLine = Platform::Maximum(line, lastLine) + 1;
 473
 474         int lookLine = line;
 475         int lookLineLevel = level;
 476         int lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
 477         while ((lookLine > 0) && ((lookLineLevel & SC_FOLDLEVELWHITEFLAG) ||
 478                 ((lookLineLevel & SC_FOLDLEVELHEADERFLAG) && (lookLineLevelNum >= (GetLevel(lookLine + 1) & SC_FOLDLEVELNUMBERMASK))))) {
 479                 lookLineLevel = GetLevel(--lookLine);
 480                 lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
 481         }
 482
 483         int beginFoldBlock = (lookLineLevel & SC_FOLDLEVELHEADERFLAG) ? lookLine : GetFoldParent(lookLine);
 484         if (beginFoldBlock == -1) {
 485                 highlightDelimiter.Clear();
 486                 return;
 487         }
 488
 489         int endFoldBlock = GetLastChild(beginFoldBlock, -1, lookLastLine);
 490         int firstChangeableLineBefore = -1;
 491         if (endFoldBlock < line) {
 492                 lookLine = beginFoldBlock - 1;
 493                 lookLineLevel = GetLevel(lookLine);
 494                 lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
 495                 while ((lookLine >= 0) && (lookLineLevelNum >= SC_FOLDLEVELBASE)) {
 496                         if (lookLineLevel & SC_FOLDLEVELHEADERFLAG) {
 497                                 if (GetLastChild(lookLine, -1, lookLastLine) == line) {
 498                                         beginFoldBlock = lookLine;
 499                                         endFoldBlock = line;
 500                                         firstChangeableLineBefore = line - 1;
 501                                 }
 502                         }
 503                         if ((lookLine > 0) && (lookLineLevelNum == SC_FOLDLEVELBASE) && ((GetLevel(lookLine - 1) & SC_FOLDLEVELNUMBERMASK) > lookLineLevelNum))
 504                                 break;
 505                         lookLineLevel = GetLevel(--lookLine);
 506                         lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
 507                 }
 508         }
 509         if (firstChangeableLineBefore == -1) {
 510                 for (lookLine = line - 1, lookLineLevel = GetLevel(lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
 511                         lookLine >= beginFoldBlock;
 512                         lookLineLevel = GetLevel(--lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK) {
 513                         if ((lookLineLevel & SC_FOLDLEVELWHITEFLAG) || (lookLineLevelNum > (level & SC_FOLDLEVELNUMBERMASK))) {
 514                                 firstChangeableLineBefore = lookLine;
 515                                 break;
 516                         }
 517                 }
 518         }
 519         if (firstChangeableLineBefore == -1)
 520                 firstChangeableLineBefore = beginFoldBlock - 1;
 521
 522         int firstChangeableLineAfter = -1;
 523         for (lookLine = line + 1, lookLineLevel = GetLevel(lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
 524                 lookLine <= endFoldBlock;
 525                 lookLineLevel = GetLevel(++lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK) {
 526                 if ((lookLineLevel & SC_FOLDLEVELHEADERFLAG) && (lookLineLevelNum < (GetLevel(lookLine + 1) & SC_FOLDLEVELNUMBERMASK))) {
 527                         firstChangeableLineAfter = lookLine;
 528                         break;
 529                 }
 530         }
 531         if (firstChangeableLineAfter == -1)
 532                 firstChangeableLineAfter = endFoldBlock + 1;
 533
 534         highlightDelimiter.beginFoldBlock = beginFoldBlock;
 535         highlightDelimiter.endFoldBlock = endFoldBlock;
 536         highlightDelimiter.firstChangeableLineBefore = firstChangeableLineBefore;
 537         highlightDelimiter.firstChangeableLineAfter = firstChangeableLineAfter;
 538 }
 539
 540 int Document::ClampPositionIntoDocument(int pos) const {
 541         return Platform::Clamp(pos, 0, Length());
 542 }
 543
 544 bool Document::IsCrLf(int pos) const {
 545         if (pos < 0)
 546                 return false;
 547         if (pos >= (Length() - 1))
 548                 return false;
 549         return (cb.CharAt(pos) == '\r') && (cb.CharAt(pos + 1) == '\n');
 550 }
 551
 552 int Document::LenChar(int pos) {
 553         if (pos < 0) {
 554                 return 1;
 555         } else if (IsCrLf(pos)) {
 556                 return 2;
 557         } else if (SC_CP_UTF8 == dbcsCodePage) {
 558                 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(pos));
 559                 const int widthCharBytes = UTF8BytesOfLead[leadByte];
 560                 int lengthDoc = Length();
 561                 if ((pos + widthCharBytes) > lengthDoc)
 562                         return lengthDoc - pos;
 563                 else
 564                         return widthCharBytes;
 565         } else if (dbcsCodePage) {
 566                 return IsDBCSLeadByte(cb.CharAt(pos)) ? 2 : 1;
 567         } else {
 568                 return 1;
 569         }
 570 }
 571
 572 bool Document::InGoodUTF8(int pos, int &start, int &end) const {
 573         int trail = pos;
 574         while ((trail>0) && (pos-trail < UTF8MaxBytes) && UTF8IsTrailByte(static_cast<unsigned char>(cb.CharAt(trail-1))))
 575                 trail--;
 576         start = (trail > 0) ? trail-1 : trail;
 577
 578         const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(start));
 579         const int widthCharBytes = UTF8BytesOfLead[leadByte];
 580         if (widthCharBytes == 1) {
 581                 return false;
 582         } else {
 583                 int trailBytes = widthCharBytes - 1;
 584                 int len = pos - start;
 585                 if (len > trailBytes)
 586                         // pos too far from lead
 587                         return false;
 588                 char charBytes[UTF8MaxBytes] = {static_cast<char>(leadByte),0,0,0};
 589                 for (int b=1; b<widthCharBytes && ((start+b) < Length()); b++)
 590                         charBytes[b] = cb.CharAt(static_cast<int>(start+b));
 591                 int utf8status = UTF8Classify(reinterpret_cast<const unsigned char *>(charBytes), widthCharBytes);
 592                 if (utf8status & UTF8MaskInvalid)
 593                         return false;
 594                 end = start + widthCharBytes;
 595                 return true;
 596         }
 597 }
 598
 599 // Normalise a position so that it is not halfway through a two byte character.
 600 // This can occur in two situations -
 601 // When lines are terminated with \r\n pairs which should be treated as one character.
 602 // When displaying DBCS text such as Japanese.
 603 // If moving, move the position in the indicated direction.
 604 int Document::MovePositionOutsideChar(int pos, int moveDir, bool checkLineEnd) {
 605         //Platform::DebugPrintf("NoCRLF %d %d\n", pos, moveDir);
 606         // If out of range, just return minimum/maximum value.
 607         if (pos <= 0)
 608                 return 0;
 609         if (pos >= Length())
 610                 return Length();
 611
 612         // PLATFORM_ASSERT(pos > 0 && pos < Length());
 613         if (checkLineEnd && IsCrLf(pos - 1)) {
 614                 if (moveDir > 0)
 615                         return pos + 1;
 616                 else
 617                         return pos - 1;
 618         }
 619
 620         if (dbcsCodePage) {
 621                 if (SC_CP_UTF8 == dbcsCodePage) {
 622                         unsigned char ch = static_cast<unsigned char>(cb.CharAt(pos));
 623                         // If ch is not a trail byte then pos is valid intercharacter position
 624                         if (UTF8IsTrailByte(ch)) {
 625                                 int startUTF = pos;
 626                                 int endUTF = pos;
 627                                 if (InGoodUTF8(pos, startUTF, endUTF)) {
 628                                         // ch is a trail byte within a UTF-8 character
 629                                         if (moveDir > 0)
 630                                                 pos = endUTF;
 631                                         else
 632                                                 pos = startUTF;
 633                                 }
 634                                 // Else invalid UTF-8 so return position of isolated trail byte
 635                         }
 636                 } else {
 637                         // Anchor DBCS calculations at start of line because start of line can
 638                         // not be a DBCS trail byte.
 639                         int posStartLine = LineStart(LineFromPosition(pos));
 640                         if (pos == posStartLine)
 641                                 return pos;
 642
 643                         // Step back until a non-lead-byte is found.
 644                         int posCheck = pos;
 645                         while ((posCheck > posStartLine) && IsDBCSLeadByte(cb.CharAt(posCheck-1)))
 646                                 posCheck--;
 647
 648                         // Check from known start of character.
 649                         while (posCheck < pos) {
 650                                 int mbsize = IsDBCSLeadByte(cb.CharAt(posCheck)) ? 2 : 1;
 651                                 if (posCheck + mbsize == pos) {
 652                                         return pos;
 653                                 } else if (posCheck + mbsize > pos) {
 654                                         if (moveDir > 0) {
 655                                                 return posCheck + mbsize;
 656                                         } else {
 657                                                 return posCheck;
 658                                         }
 659                                 }
 660                                 posCheck += mbsize;
 661                         }
 662                 }
 663         }
 664
 665         return pos;
 666 }
 667
 668 // NextPosition moves between valid positions - it can not handle a position in the middle of a
 669 // multi-byte character. It is used to iterate through text more efficiently than MovePositionOutsideChar.
 670 // A \r\n pair is treated as two characters.
 671 int Document::NextPosition(int pos, int moveDir) const {
 672         // If out of range, just return minimum/maximum value.
 673         int increment = (moveDir > 0) ? 1 : -1;
 674         if (pos + increment <= 0)
 675                 return 0;
 676         if (pos + increment >= Length())
 677                 return Length();
 678
 679         if (dbcsCodePage) {
 680                 if (SC_CP_UTF8 == dbcsCodePage) {
 681                         if (increment == 1) {
 682                                 // Simple forward movement case so can avoid some checks
 683                                 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(pos));
 684                                 if (UTF8IsAscii(leadByte)) {
 685                                         // Single byte character or invalid
 686                                         pos++;
 687                                 } else {
 688                                         const int widthCharBytes = UTF8BytesOfLead[leadByte];
 689                                         char charBytes[UTF8MaxBytes] = {static_cast<char>(leadByte),0,0,0};
 690                                         for (int b=1; b<widthCharBytes; b++)
 691                                                 charBytes[b] = cb.CharAt(static_cast<int>(pos+b));
 692                                         int utf8status = UTF8Classify(reinterpret_cast<const unsigned char *>(charBytes), widthCharBytes);
 693                                         if (utf8status & UTF8MaskInvalid)
 694                                                 pos++;
 695                                         else
 696                                                 pos += utf8status & UTF8MaskWidth;
 697                                 }
 698                         } else {
 699                                 // Examine byte before position
 700                                 pos--;
 701                                 unsigned char ch = static_cast<unsigned char>(cb.CharAt(pos));
 702                                 // If ch is not a trail byte then pos is valid intercharacter position
 703                                 if (UTF8IsTrailByte(ch)) {
 704                                         // If ch is a trail byte in a valid UTF-8 character then return start of character
 705                                         int startUTF = pos;
 706                                         int endUTF = pos;
 707                                         if (InGoodUTF8(pos, startUTF, endUTF)) {
 708                                                 pos = startUTF;
 709                                         }
 710                                         // Else invalid UTF-8 so return position of isolated trail byte
 711                                 }
 712                         }
 713                 } else {
 714                         if (moveDir > 0) {
 715                                 int mbsize = IsDBCSLeadByte(cb.CharAt(pos)) ? 2 : 1;
 716                                 pos += mbsize;
 717                                 if (pos > Length())
 718                                         pos = Length();
 719                         } else {
 720                                 // Anchor DBCS calculations at start of line because start of line can
 721                                 // not be a DBCS trail byte.
 722                                 int posStartLine = LineStart(LineFromPosition(pos));
 723                                 // See http://msdn.microsoft.com/en-us/library/cc194792%28v=MSDN.10%29.aspx
 724                                 // http://msdn.microsoft.com/en-us/library/cc194790.aspx
 725                                 if ((pos - 1) <= posStartLine) {
 726                                         return pos - 1;
 727                                 } else if (IsDBCSLeadByte(cb.CharAt(pos - 1))) {
 728                                         // Must actually be trail byte
 729                                         return pos - 2;
 730                                 } else {
 731                                         // Otherwise, step back until a non-lead-byte is found.
 732                                         int posTemp = pos - 1;
 733                                         while (posStartLine <= --posTemp && IsDBCSLeadByte(cb.CharAt(posTemp)))
 734                                                 ;
 735                                         // Now posTemp+1 must point to the beginning of a character,
 736                                         // so figure out whether we went back an even or an odd
 737                                         // number of bytes and go back 1 or 2 bytes, respectively.
 738                                         return (pos - 1 - ((pos - posTemp) & 1));
 739                                 }
 740                         }
 741                 }
 742         } else {
 743                 pos += increment;
 744         }
 745
 746         return pos;
 747 }
 748
 749 bool Document::NextCharacter(int &pos, int moveDir) const {
 750         // Returns true if pos changed
 751         int posNext = NextPosition(pos, moveDir);
 752         if (posNext == pos) {
 753                 return false;
 754         } else {
 755                 pos = posNext;
 756                 return true;
 757         }
 758 }
 759
 760 static inline int UnicodeFromBytes(const unsigned char *us) {
 761         if (us[0] < 0xC2) {
 762                 return us[0];
 763         } else if (us[0] < 0xE0) {
 764                 return ((us[0] & 0x1F) << 6) + (us[1] & 0x3F);
 765         } else if (us[0] < 0xF0) {
 766                 return ((us[0] & 0xF) << 12) + ((us[1] & 0x3F) << 6) + (us[2] & 0x3F);
 767         } else if (us[0] < 0xF5) {
 768                 return ((us[0] & 0x7) << 18) + ((us[1] & 0x3F) << 12) + ((us[2] & 0x3F) << 6) + (us[3] & 0x3F);
 769         }
 770         return us[0];
 771 }
 772
 773 // Return -1  on out-of-bounds
 774 int SCI_METHOD Document::GetRelativePosition(int positionStart, int characterOffset) const {
 775         int pos = positionStart;
 776         if (dbcsCodePage) {
 777                 const int increment = (characterOffset > 0) ? 1 : -1;
 778                 while (characterOffset != 0) {
 779                         const int posNext = NextPosition(pos, increment);
 780                         if (posNext == pos)
 781                                 return INVALID_POSITION;
 782                         pos = posNext;
 783                         characterOffset -= increment;
 784                 }
 785         } else {
 786                 pos = positionStart + characterOffset;
 787                 if ((pos < 0) || (pos > Length()))
 788                         return INVALID_POSITION;
 789         }
 790         return pos;
 791 }
 792
 793 int SCI_METHOD Document::GetCharacterAndWidth(int position, int *pWidth) const {
 794         int character;
 795         int bytesInCharacter = 1;
 796         if (dbcsCodePage) {
 797                 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(position));
 798                 if (SC_CP_UTF8 == dbcsCodePage) {
 799                         if (UTF8IsAscii(leadByte)) {
 800                                 // Single byte character or invalid
 801                                 character =  leadByte;
 802                         } else {
 803                                 const int widthCharBytes = UTF8BytesOfLead[leadByte];
 804                                 unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0};
 805                                 for (int b=1; b<widthCharBytes; b++)
 806                                         charBytes[b] = static_cast<unsigned char>(cb.CharAt(position+b));
 807                                 int utf8status = UTF8Classify(charBytes, widthCharBytes);
 808                                 if (utf8status & UTF8MaskInvalid) {
 809                                         // Report as singleton surrogate values which are invalid Unicode
 810                                         character =  0xDC80 + leadByte;
 811                                 } else {
 812                                         bytesInCharacter = utf8status & UTF8MaskWidth;
 813                                         character = UnicodeFromBytes(charBytes);
 814                                 }
 815                         }
 816                 } else {
 817                         if (IsDBCSLeadByte(leadByte)) {
 818                                 bytesInCharacter = 2;
 819                                 character = (leadByte << 8) | static_cast<unsigned char>(cb.CharAt(position+1));
 820                         } else {
 821                                 character = leadByte;
 822                         }
 823                 }
 824         } else {
 825                 character = cb.CharAt(position);
 826         }
 827         if (pWidth) {
 828                 *pWidth = bytesInCharacter;
 829         }
 830         return character;
 831 }
 832
 833 int SCI_METHOD Document::CodePage() const {
 834         return dbcsCodePage;
 835 }
 836
 837 bool SCI_METHOD Document::IsDBCSLeadByte(char ch) const {
 838         // Byte ranges found in Wikipedia articles with relevant search strings in each case
 839         unsigned char uch = static_cast<unsigned char>(ch);
 840         switch (dbcsCodePage) {
 841                 case 932:
 842                         // Shift_jis
 843                         return ((uch >= 0x81) && (uch <= 0x9F)) ||
 844                                 ((uch >= 0xE0) && (uch <= 0xFC));
 845                                 // Lead bytes F0 to FC may be a Microsoft addition.
 846                 case 936:
 847                         // GBK
 848                         return (uch >= 0x81) && (uch <= 0xFE);
 849                 case 949:
 850                         // Korean Wansung KS C-5601-1987
 851                         return (uch >= 0x81) && (uch <= 0xFE);
 852                 case 950:
 853                         // Big5
 854                         return (uch >= 0x81) && (uch <= 0xFE);
 855                 case 1361:
 856                         // Korean Johab KS C-5601-1992
 857                         return
 858                                 ((uch >= 0x84) && (uch <= 0xD3)) ||
 859                                 ((uch >= 0xD8) && (uch <= 0xDE)) ||
 860                                 ((uch >= 0xE0) && (uch <= 0xF9));
 861         }
 862         return false;
 863 }
 864
 865 static inline bool IsSpaceOrTab(int ch) {
 866         return ch == ' ' || ch == '\t';
 867 }
 868
 869 // Need to break text into segments near lengthSegment but taking into
 870 // account the encoding to not break inside a UTF-8 or DBCS character
 871 // and also trying to avoid breaking inside a pair of combining characters.
 872 // The segment length must always be long enough (more than 4 bytes)
 873 // so that there will be at least one whole character to make a segment.
 874 // For UTF-8, text must consist only of valid whole characters.
 875 // In preference order from best to worst:
 876 //   1) Break after space
 877 //   2) Break before punctuation
 878 //   3) Break after whole character
 879
 880 int Document::SafeSegment(const char *text, int length, int lengthSegment) const {
 881         if (length <= lengthSegment)
 882                 return length;
 883         int lastSpaceBreak = -1;
 884         int lastPunctuationBreak = -1;
 885         int lastEncodingAllowedBreak = 0;
 886         for (int j=0; j < lengthSegment;) {
 887                 unsigned char ch = static_cast<unsigned char>(text[j]);
 888                 if (j > 0) {
 889                         if (IsSpaceOrTab(text[j - 1]) && !IsSpaceOrTab(text[j])) {
 890                                 lastSpaceBreak = j;
 891                         }
 892                         if (ch < 'A') {
 893                                 lastPunctuationBreak = j;
 894                         }
 895                 }
 896                 lastEncodingAllowedBreak = j;
 897
 898                 if (dbcsCodePage == SC_CP_UTF8) {
 899                         j += UTF8BytesOfLead[ch];
 900                 } else if (dbcsCodePage) {
 901                         j += IsDBCSLeadByte(ch) ? 2 : 1;
 902                 } else {
 903                         j++;
 904                 }
 905         }
 906         if (lastSpaceBreak >= 0) {
 907                 return lastSpaceBreak;
 908         } else if (lastPunctuationBreak >= 0) {
 909                 return lastPunctuationBreak;
 910         }
 911         return lastEncodingAllowedBreak;
 912 }
 913
 914 EncodingFamily Document::CodePageFamily() const {
 915         if (SC_CP_UTF8 == dbcsCodePage)
 916                 return efUnicode;
 917         else if (dbcsCodePage)
 918                 return efDBCS;
 919         else
 920                 return efEightBit;
 921 }
 922
 923 void Document::ModifiedAt(int pos) {
 924         if (endStyled > pos)
 925                 endStyled = pos;
 926 }
 927
 928 void Document::CheckReadOnly() {
 929         if (cb.IsReadOnly() && enteredReadOnlyCount == 0) {
 930                 enteredReadOnlyCount++;
 931                 NotifyModifyAttempt();
 932                 enteredReadOnlyCount--;
 933         }
 934 }
 935
 936 // Document only modified by gateways DeleteChars, InsertString, Undo, Redo, and SetStyleAt.
 937 // SetStyleAt does not change the persistent state of a document
 938
 939 bool Document::DeleteChars(int pos, int len) {
 940         if (pos < 0)
 941                 return false;
 942         if (len <= 0)
 943                 return false;
 944         if ((pos + len) > Length())
 945                 return false;
 946         CheckReadOnly();
 947         if (enteredModification != 0) {
 948                 return false;
 949         } else {
 950                 enteredModification++;
 951                 if (!cb.IsReadOnly()) {
 952                         NotifyModified(
 953                             DocModification(
 954                                 SC_MOD_BEFOREDELETE | SC_PERFORMED_USER,
 955                                 pos, len,
 956                                 0, 0));
 957                         int prevLinesTotal = LinesTotal();
 958                         bool startSavePoint = cb.IsSavePoint();
 959                         bool startSequence = false;
 960                         const char *text = cb.DeleteChars(pos, len, startSequence);
 961                         if (startSavePoint && cb.IsCollectingUndo())
 962                                 NotifySavePoint(!startSavePoint);
 963                         if ((pos < Length()) || (pos == 0))
 964                                 ModifiedAt(pos);
 965                         else
 966                                 ModifiedAt(pos-1);
 967                         NotifyModified(
 968                             DocModification(
 969                                 SC_MOD_DELETETEXT | SC_PERFORMED_USER | (startSequence?SC_STARTACTION:0),
 970                                 pos, len,
 971                                 LinesTotal() - prevLinesTotal, text));
 972                 }
 973                 enteredModification--;
 974         }
 975         return !cb.IsReadOnly();
 976 }
 977
 978 /**
 979  * Insert a string with a length.
 980  */
 981 int Document::InsertString(int position, const char *s, int insertLength) {
 982         if (insertLength <= 0) {
 983                 return 0;
 984         }
 985         CheckReadOnly();        // Application may change read only state here
 986         if (cb.IsReadOnly()) {
 987                 return 0;
 988         }
 989         if (enteredModification != 0) {
 990                 return 0;
 991         }
 992         enteredModification++;
 993         insertionSet = false;
 994         insertion.clear();
 995         NotifyModified(
 996                 DocModification(
 997                         SC_MOD_INSERTCHECK,
 998                         position, insertLength,
 999                         0, s));
1000         if (insertionSet) {
1001                 s = insertion.c_str();
1002                 insertLength = static_cast<int>(insertion.length());
1003         }
1004         NotifyModified(
1005                 DocModification(
1006                         SC_MOD_BEFOREINSERT | SC_PERFORMED_USER,
1007                         position, insertLength,
1008                         0, s));
1009         int prevLinesTotal = LinesTotal();
1010         bool startSavePoint = cb.IsSavePoint();
1011         bool startSequence = false;
1012         const char *text = cb.InsertString(position, s, insertLength, startSequence);
1013         if (startSavePoint && cb.IsCollectingUndo())
1014                 NotifySavePoint(!startSavePoint);
1015         ModifiedAt(position);
1016         NotifyModified(
1017                 DocModification(
1018                         SC_MOD_INSERTTEXT | SC_PERFORMED_USER | (startSequence?SC_STARTACTION:0),
1019                         position, insertLength,
1020                         LinesTotal() - prevLinesTotal, text));
1021         if (insertionSet) {     // Free memory as could be large
1022                 std::string().swap(insertion);
1023         }
1024         enteredModification--;
1025         return insertLength;
1026 }
1027
1028 void Document::ChangeInsertion(const char *s, int length) {
1029         insertionSet = true;
1030         insertion.assign(s, length);
1031 }
1032
1033 int SCI_METHOD Document::AddData(char *data, int length) {
1034         try {
1035                 int position = Length();
1036                 InsertString(position, data, length);
1037         } catch (std::bad_alloc &) {
1038                 return SC_STATUS_BADALLOC;
1039         } catch (...) {
1040                 return SC_STATUS_FAILURE;
1041         }
1042         return 0;
1043 }
1044
1045 void * SCI_METHOD Document::ConvertToDocument() {
1046         return this;
1047 }
1048
1049 int Document::Undo() {
1050         int newPos = -1;
1051         CheckReadOnly();
1052         if ((enteredModification == 0) && (cb.IsCollectingUndo())) {
1053                 enteredModification++;
1054                 if (!cb.IsReadOnly()) {
1055                         bool startSavePoint = cb.IsSavePoint();
1056                         bool multiLine = false;
1057                         int steps = cb.StartUndo();
1058                         //Platform::DebugPrintf("Steps=%d\n", steps);
1059                         int coalescedRemovePos = -1;
1060                         int coalescedRemoveLen = 0;
1061                         int prevRemoveActionPos = -1;
1062                         int prevRemoveActionLen = 0;
1063                         for (int step = 0; step < steps; step++) {
1064                                 const int prevLinesTotal = LinesTotal();
1065                                 const Action &action = cb.GetUndoStep();
1066                                 if (action.at == removeAction) {
1067                                         NotifyModified(DocModification(
1068                                                                         SC_MOD_BEFOREINSERT | SC_PERFORMED_UNDO, action));
1069                                 } else if (action.at == containerAction) {
1070                                         DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_UNDO);
1071                                         dm.token = action.position;
1072                                         NotifyModified(dm);
1073                                         if (!action.mayCoalesce) {
1074                                                 coalescedRemovePos = -1;
1075                                                 coalescedRemoveLen = 0;
1076                                                 prevRemoveActionPos = -1;
1077                                                 prevRemoveActionLen = 0;
1078                                         }
1079                                 } else {
1080                                         NotifyModified(DocModification(
1081                                                                         SC_MOD_BEFOREDELETE | SC_PERFORMED_UNDO, action));
1082                                 }
1083                                 cb.PerformUndoStep();
1084                                 if (action.at != containerAction) {
1085                                         ModifiedAt(action.position);
1086                                         newPos = action.position;
1087                                 }
1088
1089                                 int modFlags = SC_PERFORMED_UNDO;
1090                                 // With undo, an insertion action becomes a deletion notification
1091                                 if (action.at == removeAction) {
1092                                         newPos += action.lenData;
1093                                         modFlags |= SC_MOD_INSERTTEXT;
1094                                         if ((coalescedRemoveLen > 0) &&
1095                                                 (action.position == prevRemoveActionPos || action.position == (prevRemoveActionPos + prevRemoveActionLen))) {
1096                                                 coalescedRemoveLen += action.lenData;
1097                                                 newPos = coalescedRemovePos + coalescedRemoveLen;
1098                                         } else {
1099                                                 coalescedRemovePos = action.position;
1100                                                 coalescedRemoveLen = action.lenData;
1101                                         }
1102                                         prevRemoveActionPos = action.position;
1103                                         prevRemoveActionLen = action.lenData;
1104                                 } else if (action.at == insertAction) {
1105                                         modFlags |= SC_MOD_DELETETEXT;
1106                                         coalescedRemovePos = -1;
1107                                         coalescedRemoveLen = 0;
1108                                         prevRemoveActionPos = -1;
1109                                         prevRemoveActionLen = 0;
1110                                 }
1111                                 if (steps > 1)
1112                                         modFlags |= SC_MULTISTEPUNDOREDO;
1113                                 const int linesAdded = LinesTotal() - prevLinesTotal;
1114                                 if (linesAdded != 0)
1115                                         multiLine = true;
1116                                 if (step == steps - 1) {
1117                                         modFlags |= SC_LASTSTEPINUNDOREDO;
1118                                         if (multiLine)
1119                                                 modFlags |= SC_MULTILINEUNDOREDO;
1120                                 }
1121                                 NotifyModified(DocModification(modFlags, action.position, action.lenData,
1122                                                                                            linesAdded, action.data));
1123                         }
1124
1125                         bool endSavePoint = cb.IsSavePoint();
1126                         if (startSavePoint != endSavePoint)
1127                                 NotifySavePoint(endSavePoint);
1128                 }
1129                 enteredModification--;
1130         }
1131         return newPos;
1132 }
1133
1134 int Document::Redo() {
1135         int newPos = -1;
1136         CheckReadOnly();
1137         if ((enteredModification == 0) && (cb.IsCollectingUndo())) {
1138                 enteredModification++;
1139                 if (!cb.IsReadOnly()) {
1140                         bool startSavePoint = cb.IsSavePoint();
1141                         bool multiLine = false;
1142                         int steps = cb.StartRedo();
1143                         for (int step = 0; step < steps; step++) {
1144                                 const int prevLinesTotal = LinesTotal();
1145                                 const Action &action = cb.GetRedoStep();
1146                                 if (action.at == insertAction) {
1147                                         NotifyModified(DocModification(
1148                                                                         SC_MOD_BEFOREINSERT | SC_PERFORMED_REDO, action));
1149                                 } else if (action.at == containerAction) {
1150                                         DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_REDO);
1151                                         dm.token = action.position;
1152                                         NotifyModified(dm);
1153                                 } else {
1154                                         NotifyModified(DocModification(
1155                                                                         SC_MOD_BEFOREDELETE | SC_PERFORMED_REDO, action));
1156                                 }
1157                                 cb.PerformRedoStep();
1158                                 if (action.at != containerAction) {
1159                                         ModifiedAt(action.position);
1160                                         newPos = action.position;
1161                                 }
1162
1163                                 int modFlags = SC_PERFORMED_REDO;
1164                                 if (action.at == insertAction) {
1165                                         newPos += action.lenData;
1166                                         modFlags |= SC_MOD_INSERTTEXT;
1167                                 } else if (action.at == removeAction) {
1168                                         modFlags |= SC_MOD_DELETETEXT;
1169                                 }
1170                                 if (steps > 1)
1171                                         modFlags |= SC_MULTISTEPUNDOREDO;
1172                                 const int linesAdded = LinesTotal() - prevLinesTotal;
1173                                 if (linesAdded != 0)
1174                                         multiLine = true;
1175                                 if (step == steps - 1) {
1176                                         modFlags |= SC_LASTSTEPINUNDOREDO;
1177                                         if (multiLine)
1178                                                 modFlags |= SC_MULTILINEUNDOREDO;
1179                                 }
1180                                 NotifyModified(
1181                                         DocModification(modFlags, action.position, action.lenData,
1182                                                                         linesAdded, action.data));
1183                         }
1184
1185                         bool endSavePoint = cb.IsSavePoint();
1186                         if (startSavePoint != endSavePoint)
1187                                 NotifySavePoint(endSavePoint);
1188                 }
1189                 enteredModification--;
1190         }
1191         return newPos;
1192 }
1193
1194 void Document::DelChar(int pos) {
1195         DeleteChars(pos, LenChar(pos));
1196 }
1197
1198 void Document::DelCharBack(int pos) {
1199         if (pos <= 0) {
1200                 return;
1201         } else if (IsCrLf(pos - 2)) {
1202                 DeleteChars(pos - 2, 2);
1203         } else if (dbcsCodePage) {
1204                 int startChar = NextPosition(pos, -1);
1205                 DeleteChars(startChar, pos - startChar);
1206         } else {
1207                 DeleteChars(pos - 1, 1);
1208         }
1209 }
1210
1211 static int NextTab(int pos, int tabSize) {
1212         return ((pos / tabSize) + 1) * tabSize;
1213 }
1214
1215 static std::string CreateIndentation(int indent, int tabSize, bool insertSpaces) {
1216         std::string indentation;
1217         if (!insertSpaces) {
1218                 while (indent >= tabSize) {
1219                         indentation += '\t';
1220                         indent -= tabSize;
1221                 }
1222         }
1223         while (indent > 0) {
1224                 indentation += ' ';
1225                 indent--;
1226         }
1227         return indentation;
1228 }
1229
1230 int SCI_METHOD Document::GetLineIndentation(int line) {
1231         int indent = 0;
1232         if ((line >= 0) && (line < LinesTotal())) {
1233                 int lineStart = LineStart(line);
1234                 int length = Length();
1235                 for (int i = lineStart; i < length; i++) {
1236                         char ch = cb.CharAt(i);
1237                         if (ch == ' ')
1238                                 indent++;
1239                         else if (ch == '\t')
1240                                 indent = NextTab(indent, tabInChars);
1241                         else
1242                                 return indent;
1243                 }
1244         }
1245         return indent;
1246 }
1247
1248 int Document::SetLineIndentation(int line, int indent) {
1249         int indentOfLine = GetLineIndentation(line);
1250         if (indent < 0)
1251                 indent = 0;
1252         if (indent != indentOfLine) {
1253                 std::string linebuf = CreateIndentation(indent, tabInChars, !useTabs);
1254                 int thisLineStart = LineStart(line);
1255                 int indentPos = GetLineIndentPosition(line);
1256                 UndoGroup ug(this);
1257                 DeleteChars(thisLineStart, indentPos - thisLineStart);
1258                 return thisLineStart + InsertString(thisLineStart, linebuf.c_str(),
1259                         static_cast<int>(linebuf.length()));
1260         } else {
1261                 return GetLineIndentPosition(line);
1262         }
1263 }
1264
1265 int Document::GetLineIndentPosition(int line) const {
1266         if (line < 0)
1267                 return 0;
1268         int pos = LineStart(line);
1269         int length = Length();
1270         while ((pos < length) && IsSpaceOrTab(cb.CharAt(pos))) {
1271                 pos++;
1272         }
1273         return pos;
1274 }
1275
1276 int Document::GetColumn(int pos) {
1277         int column = 0;
1278         int line = LineFromPosition(pos);
1279         if ((line >= 0) && (line < LinesTotal())) {
1280                 for (int i = LineStart(line); i < pos;) {
1281                         char ch = cb.CharAt(i);
1282                         if (ch == '\t') {
1283                                 column = NextTab(column, tabInChars);
1284                                 i++;
1285                         } else if (ch == '\r') {
1286                                 return column;
1287                         } else if (ch == '\n') {
1288                                 return column;
1289                         } else if (i >= Length()) {
1290                                 return column;
1291                         } else {
1292                                 column++;
1293                                 i = NextPosition(i, 1);
1294                         }
1295                 }
1296         }
1297         return column;
1298 }
1299
1300 int Document::CountCharacters(int startPos, int endPos) {
1301         startPos = MovePositionOutsideChar(startPos, 1, false);
1302         endPos = MovePositionOutsideChar(endPos, -1, false);
1303         int count = 0;
1304         int i = startPos;
1305         while (i < endPos) {
1306                 count++;
1307                 if (IsCrLf(i))
1308                         i++;
1309                 i = NextPosition(i, 1);
1310         }
1311         return count;
1312 }
1313
1314 int Document::FindColumn(int line, int column) {
1315         int position = LineStart(line);
1316         if ((line >= 0) && (line < LinesTotal())) {
1317                 int columnCurrent = 0;
1318                 while ((columnCurrent < column) && (position < Length())) {
1319                         char ch = cb.CharAt(position);
1320                         if (ch == '\t') {
1321                                 columnCurrent = NextTab(columnCurrent, tabInChars);
1322                                 if (columnCurrent > column)
1323                                         return position;
1324                                 position++;
1325                         } else if (ch == '\r') {
1326                                 return position;
1327                         } else if (ch == '\n') {
1328                                 return position;
1329                         } else {
1330                                 columnCurrent++;
1331                                 position = NextPosition(position, 1);
1332                         }
1333                 }
1334         }
1335         return position;
1336 }
1337
1338 void Document::Indent(bool forwards, int lineBottom, int lineTop) {
1339         // Dedent - suck white space off the front of the line to dedent by equivalent of a tab
1340         for (int line = lineBottom; line >= lineTop; line--) {
1341                 int indentOfLine = GetLineIndentation(line);
1342                 if (forwards) {
1343                         if (LineStart(line) < LineEnd(line)) {
1344                                 SetLineIndentation(line, indentOfLine + IndentSize());
1345                         }
1346                 } else {
1347                         SetLineIndentation(line, indentOfLine - IndentSize());
1348                 }
1349         }
1350 }
1351
1352 // Convert line endings for a piece of text to a particular mode.
1353 // Stop at len or when a NUL is found.
1354 std::string Document::TransformLineEnds(const char *s, size_t len, int eolModeWanted) {
1355         std::string dest;
1356         for (size_t i = 0; (i < len) && (s[i]); i++) {
1357                 if (s[i] == '\n' || s[i] == '\r') {
1358                         if (eolModeWanted == SC_EOL_CR) {
1359                                 dest.push_back('\r');
1360                         } else if (eolModeWanted == SC_EOL_LF) {
1361                                 dest.push_back('\n');
1362                         } else { // eolModeWanted == SC_EOL_CRLF
1363                                 dest.push_back('\r');
1364                                 dest.push_back('\n');
1365                         }
1366                         if ((s[i] == '\r') && (i+1 < len) && (s[i+1] == '\n')) {
1367                                 i++;
1368                         }
1369                 } else {
1370                         dest.push_back(s[i]);
1371                 }
1372         }
1373         return dest;
1374 }
1375
1376 void Document::ConvertLineEnds(int eolModeSet) {
1377         UndoGroup ug(this);
1378
1379         for (int pos = 0; pos < Length(); pos++) {
1380                 if (cb.CharAt(pos) == '\r') {
1381                         if (cb.CharAt(pos + 1) == '\n') {
1382                                 // CRLF
1383                                 if (eolModeSet == SC_EOL_CR) {
1384                                         DeleteChars(pos + 1, 1); // Delete the LF
1385                                 } else if (eolModeSet == SC_EOL_LF) {
1386                                         DeleteChars(pos, 1); // Delete the CR
1387                                 } else {
1388                                         pos++;
1389                                 }
1390                         } else {
1391                                 // CR
1392                                 if (eolModeSet == SC_EOL_CRLF) {
1393                                         pos += InsertString(pos + 1, "\n", 1); // Insert LF
1394                                 } else if (eolModeSet == SC_EOL_LF) {
1395                                         pos += InsertString(pos, "\n", 1); // Insert LF
1396                                         DeleteChars(pos, 1); // Delete CR
1397                                         pos--;
1398                                 }
1399                         }
1400                 } else if (cb.CharAt(pos) == '\n') {
1401                         // LF
1402                         if (eolModeSet == SC_EOL_CRLF) {
1403                                 pos += InsertString(pos, "\r", 1); // Insert CR
1404                         } else if (eolModeSet == SC_EOL_CR) {
1405                                 pos += InsertString(pos, "\r", 1); // Insert CR
1406                                 DeleteChars(pos, 1); // Delete LF
1407                                 pos--;
1408                         }
1409                 }
1410         }
1411
1412 }
1413
1414 bool Document::IsWhiteLine(int line) const {
1415         int currentChar = LineStart(line);
1416         int endLine = LineEnd(line);
1417         while (currentChar < endLine) {
1418                 if (cb.CharAt(currentChar) != ' ' && cb.CharAt(currentChar) != '\t') {
1419                         return false;
1420                 }
1421                 ++currentChar;
1422         }
1423         return true;
1424 }
1425
1426 int Document::ParaUp(int pos) const {
1427         int line = LineFromPosition(pos);
1428         line--;
1429         while (line >= 0 && IsWhiteLine(line)) { // skip empty lines
1430                 line--;
1431         }
1432         while (line >= 0 && !IsWhiteLine(line)) { // skip non-empty lines
1433                 line--;
1434         }
1435         line++;
1436         return LineStart(line);
1437 }
1438
1439 int Document::ParaDown(int pos) const {
1440         int line = LineFromPosition(pos);
1441         while (line < LinesTotal() && !IsWhiteLine(line)) { // skip non-empty lines
1442                 line++;
1443         }
1444         while (line < LinesTotal() && IsWhiteLine(line)) { // skip empty lines
1445                 line++;
1446         }
1447         if (line < LinesTotal())
1448                 return LineStart(line);
1449         else // end of a document
1450                 return LineEnd(line-1);
1451 }
1452
1453 CharClassify::cc Document::WordCharClass(unsigned char ch) const {
1454         if ((SC_CP_UTF8 == dbcsCodePage) && (!UTF8IsAscii(ch)))
1455                 return CharClassify::ccWord;
1456         return charClass.GetClass(ch);
1457 }
1458
1459 /**
1460  * Used by commmands that want to select whole words.
1461  * Finds the start of word at pos when delta < 0 or the end of the word when delta >= 0.
1462  */
1463 int Document::ExtendWordSelect(int pos, int delta, bool onlyWordCharacters) {
1464         CharClassify::cc ccStart = CharClassify::ccWord;
1465         if (delta < 0) {
1466                 if (!onlyWordCharacters)
1467                         ccStart = WordCharClass(cb.CharAt(pos-1));
1468                 while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == ccStart))
1469                         pos--;
1470         } else {
1471                 if (!onlyWordCharacters && pos < Length())
1472                         ccStart = WordCharClass(cb.CharAt(pos));
1473                 while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == ccStart))
1474                         pos++;
1475         }
1476         return MovePositionOutsideChar(pos, delta, true);
1477 }
1478
1479 /**
1480  * Find the start of the next word in either a forward (delta >= 0) or backwards direction
1481  * (delta < 0).
1482  * This is looking for a transition between character classes although there is also some
1483  * additional movement to transit white space.
1484  * Used by cursor movement by word commands.
1485  */
1486 int Document::NextWordStart(int pos, int delta) {
1487         if (delta < 0) {
1488                 while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == CharClassify::ccSpace))
1489                         pos--;
1490                 if (pos > 0) {
1491                         CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos-1));
1492                         while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == ccStart)) {
1493                                 pos--;
1494                         }
1495                 }
1496         } else {
1497                 CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos));
1498                 while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == ccStart))
1499                         pos++;
1500                 while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == CharClassify::ccSpace))
1501                         pos++;
1502         }
1503         return pos;
1504 }
1505
1506 /**
1507  * Find the end of the next word in either a forward (delta >= 0) or backwards direction
1508  * (delta < 0).
1509  * This is looking for a transition between character classes although there is also some
1510  * additional movement to transit white space.
1511  * Used by cursor movement by word commands.
1512  */
1513 int Document::NextWordEnd(int pos, int delta) {
1514         if (delta < 0) {
1515                 if (pos > 0) {
1516                         CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos-1));
1517                         if (ccStart != CharClassify::ccSpace) {
1518                                 while (pos > 0 && WordCharClass(cb.CharAt(pos - 1)) == ccStart) {
1519                                         pos--;
1520                                 }
1521                         }
1522                         while (pos > 0 && WordCharClass(cb.CharAt(pos - 1)) == CharClassify::ccSpace) {
1523                                 pos--;
1524                         }
1525                 }
1526         } else {
1527                 while (pos < Length() && WordCharClass(cb.CharAt(pos)) == CharClassify::ccSpace) {
1528                         pos++;
1529                 }
1530                 if (pos < Length()) {
1531                         CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos));
1532                         while (pos < Length() && WordCharClass(cb.CharAt(pos)) == ccStart) {
1533                                 pos++;
1534                         }
1535                 }
1536         }
1537         return pos;
1538 }
1539
1540 /**
1541  * Check that the character at the given position is a word or punctuation character and that
1542  * the previous character is of a different character class.
1543  */
1544 bool Document::IsWordStartAt(int pos) const {
1545         if (pos > 0) {
1546                 CharClassify::cc ccPos = WordCharClass(CharAt(pos));
1547                 return (ccPos == CharClassify::ccWord || ccPos == CharClassify::ccPunctuation) &&
1548                         (ccPos != WordCharClass(CharAt(pos - 1)));
1549         }
1550         return true;
1551 }
1552
1553 /**
1554  * Check that the character at the given position is a word or punctuation character and that
1555  * the next character is of a different character class.
1556  */
1557 bool Document::IsWordEndAt(int pos) const {
1558         if (pos < Length()) {
1559                 CharClassify::cc ccPrev = WordCharClass(CharAt(pos-1));
1560                 return (ccPrev == CharClassify::ccWord || ccPrev == CharClassify::ccPunctuation) &&
1561                         (ccPrev != WordCharClass(CharAt(pos)));
1562         }
1563         return true;
1564 }
1565
1566 /**
1567  * Check that the given range is has transitions between character classes at both
1568  * ends and where the characters on the inside are word or punctuation characters.
1569  */
1570 bool Document::IsWordAt(int start, int end) const {
1571         return IsWordStartAt(start) && IsWordEndAt(end);
1572 }
1573
1574 bool Document::MatchesWordOptions(bool word, bool wordStart, int pos, int length) const {
1575         return (!word && !wordStart) ||
1576                         (word && IsWordAt(pos, pos + length)) ||
1577                         (wordStart && IsWordStartAt(pos));
1578 }
1579
1580 bool Document::HasCaseFolder(void) const {
1581         return pcf != 0;
1582 }
1583
1584 void Document::SetCaseFolder(CaseFolder *pcf_) {
1585         delete pcf;
1586         pcf = pcf_;
1587 }
1588
1589 /**
1590  * Find text in document, supporting both forward and backward
1591  * searches (just pass minPos > maxPos to do a backward search)
1592  * Has not been tested with backwards DBCS searches yet.
1593  */
1594 long Document::FindText(int minPos, int maxPos, const char *search,
1595                         bool caseSensitive, bool word, bool wordStart, bool regExp, int flags,
1596                         int *length) {
1597         if (*length <= 0)
1598                 return minPos;
1599         if (regExp) {
1600                 if (!regex)
1601                         regex = CreateRegexSearch(&charClass);
1602                 return regex->FindText(this, minPos, maxPos, search, caseSensitive, word, wordStart, flags, length);
1603         } else {
1604
1605                 const bool forward = minPos <= maxPos;
1606                 const int increment = forward ? 1 : -1;
1607
1608                 // Range endpoints should not be inside DBCS characters, but just in case, move them.
1609                 const int startPos = MovePositionOutsideChar(minPos, increment, false);
1610                 const int endPos = MovePositionOutsideChar(maxPos, increment, false);
1611
1612                 // Compute actual search ranges needed
1613                 const int lengthFind = *length;
1614
1615                 //Platform::DebugPrintf("Find %d %d %s %d\n", startPos, endPos, ft->lpstrText, lengthFind);
1616                 const int limitPos = Platform::Maximum(startPos, endPos);
1617                 int pos = startPos;
1618                 if (!forward) {
1619                         // Back all of a character
1620                         pos = NextPosition(pos, increment);
1621                 }
1622                 if (caseSensitive) {
1623                         const int endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos;
1624                         const char charStartSearch =  search[0];
1625                         while (forward ? (pos < endSearch) : (pos >= endSearch)) {
1626                                 if (CharAt(pos) == charStartSearch) {
1627                                         bool found = (pos + lengthFind) <= limitPos;
1628                                         for (int indexSearch = 1; (indexSearch < lengthFind) && found; indexSearch++) {
1629                                                 found = CharAt(pos + indexSearch) == search[indexSearch];
1630                                         }
1631                                         if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
1632                                                 return pos;
1633                                         }
1634                                 }
1635                                 if (!NextCharacter(pos, increment))
1636                                         break;
1637                         }
1638                 } else if (SC_CP_UTF8 == dbcsCodePage) {
1639                         const size_t maxFoldingExpansion = 4;
1640                         std::vector<char> searchThing(lengthFind * UTF8MaxBytes * maxFoldingExpansion + 1);
1641                         const int lenSearch = static_cast<int>(
1642                                 pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind));
1643                         char bytes[UTF8MaxBytes + 1];
1644                         char folded[UTF8MaxBytes * maxFoldingExpansion + 1];
1645                         while (forward ? (pos < endPos) : (pos >= endPos)) {
1646                                 int widthFirstCharacter = 0;
1647                                 int posIndexDocument = pos;
1648                                 int indexSearch = 0;
1649                                 bool characterMatches = true;
1650                                 for (;;) {
1651                                         const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(posIndexDocument));
1652                                         bytes[0] = leadByte;
1653                                         int widthChar = 1;
1654                                         if (!UTF8IsAscii(leadByte)) {
1655                                                 const int widthCharBytes = UTF8BytesOfLead[leadByte];
1656                                                 for (int b=1; b<widthCharBytes; b++) {
1657                                                         bytes[b] = cb.CharAt(posIndexDocument+b);
1658                                                 }
1659                                                 widthChar = UTF8Classify(reinterpret_cast<const unsigned char *>(bytes), widthCharBytes) & UTF8MaskWidth;
1660                                         }
1661                                         if (!widthFirstCharacter)
1662                                                 widthFirstCharacter = widthChar;
1663                                         if ((posIndexDocument + widthChar) > limitPos)
1664                                                 break;
1665                                         const int lenFlat = static_cast<int>(pcf->Fold(folded, sizeof(folded), bytes, widthChar));
1666                                         folded[lenFlat] = 0;
1667                                         // Does folded match the buffer
1668                                         characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat);
1669                                         if (!characterMatches)
1670                                                 break;
1671                                         posIndexDocument += widthChar;
1672                                         indexSearch += lenFlat;
1673                                         if (indexSearch >= lenSearch)
1674                                                 break;
1675                                 }
1676                                 if (characterMatches && (indexSearch == static_cast<int>(lenSearch))) {
1677                                         if (MatchesWordOptions(word, wordStart, pos, posIndexDocument - pos)) {
1678                                                 *length = posIndexDocument - pos;
1679                                                 return pos;
1680                                         }
1681                                 }
1682                                 if (forward) {
1683                                         pos += widthFirstCharacter;
1684                                 } else {
1685                                         if (!NextCharacter(pos, increment))
1686                                                 break;
1687                                 }
1688                         }
1689                 } else if (dbcsCodePage) {
1690                         const size_t maxBytesCharacter = 2;
1691                         const size_t maxFoldingExpansion = 4;
1692                         std::vector<char> searchThing(lengthFind * maxBytesCharacter * maxFoldingExpansion + 1);
1693                         const int lenSearch = static_cast<int>(
1694                                 pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind));
1695                         while (forward ? (pos < endPos) : (pos >= endPos)) {
1696                                 int indexDocument = 0;
1697                                 int indexSearch = 0;
1698                                 bool characterMatches = true;
1699                                 while (characterMatches &&
1700                                         ((pos + indexDocument) < limitPos) &&
1701                                         (indexSearch < lenSearch)) {
1702                                         char bytes[maxBytesCharacter + 1];
1703                                         bytes[0] = cb.CharAt(pos + indexDocument);
1704                                         const int widthChar = IsDBCSLeadByte(bytes[0]) ? 2 : 1;
1705                                         if (widthChar == 2)
1706                                                 bytes[1] = cb.CharAt(pos + indexDocument + 1);
1707                                         if ((pos + indexDocument + widthChar) > limitPos)
1708                                                 break;
1709                                         char folded[maxBytesCharacter * maxFoldingExpansion + 1];
1710                                         const int lenFlat = static_cast<int>(pcf->Fold(folded, sizeof(folded), bytes, widthChar));
1711                                         folded[lenFlat] = 0;
1712                                         // Does folded match the buffer
1713                                         characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat);
1714                                         indexDocument += widthChar;
1715                                         indexSearch += lenFlat;
1716                                 }
1717                                 if (characterMatches && (indexSearch == static_cast<int>(lenSearch))) {
1718                                         if (MatchesWordOptions(word, wordStart, pos, indexDocument)) {
1719                                                 *length = indexDocument;
1720                                                 return pos;
1721                                         }
1722                                 }
1723                                 if (!NextCharacter(pos, increment))
1724                                         break;
1725                         }
1726                 } else {
1727                         const int endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos;
1728                         std::vector<char> searchThing(lengthFind + 1);
1729                         pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind);
1730                         while (forward ? (pos < endSearch) : (pos >= endSearch)) {
1731                                 bool found = (pos + lengthFind) <= limitPos;
1732                                 for (int indexSearch = 0; (indexSearch < lengthFind) && found; indexSearch++) {
1733                                         char ch = CharAt(pos + indexSearch);
1734                                         char folded[2];
1735                                         pcf->Fold(folded, sizeof(folded), &ch, 1);
1736                                         found = folded[0] == searchThing[indexSearch];
1737                                 }
1738                                 if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
1739                                         return pos;
1740                                 }
1741                                 if (!NextCharacter(pos, increment))
1742                                         break;
1743                         }
1744                 }
1745         }
1746         //Platform::DebugPrintf("Not found\n");
1747         return -1;
1748 }
1749
1750 const char *Document::SubstituteByPosition(const char *text, int *length) {
1751         if (regex)
1752                 return regex->SubstituteByPosition(this, text, length);
1753         else
1754                 return 0;
1755 }
1756
1757 int Document::LinesTotal() const {
1758         return cb.Lines();
1759 }
1760
1761 void Document::SetDefaultCharClasses(bool includeWordClass) {
1762     charClass.SetDefaultCharClasses(includeWordClass);
1763 }
1764
1765 void Document::SetCharClasses(const unsigned char *chars, CharClassify::cc newCharClass) {
1766     charClass.SetCharClasses(chars, newCharClass);
1767 }
1768
1769 int Document::GetCharsOfClass(CharClassify::cc characterClass, unsigned char *buffer) {
1770     return charClass.GetCharsOfClass(characterClass, buffer);
1771 }
1772
1773 void SCI_METHOD Document::StartStyling(int position, char) {
1774         endStyled = position;
1775 }
1776
1777 bool SCI_METHOD Document::SetStyleFor(int length, char style) {
1778         if (enteredStyling != 0) {
1779                 return false;
1780         } else {
1781                 enteredStyling++;
1782                 int prevEndStyled = endStyled;
1783                 if (cb.SetStyleFor(endStyled, length, style)) {
1784                         DocModification mh(SC_MOD_CHANGESTYLE | SC_PERFORMED_USER,
1785                                            prevEndStyled, length);
1786                         NotifyModified(mh);
1787                 }
1788                 endStyled += length;
1789                 enteredStyling--;
1790                 return true;
1791         }
1792 }
1793
1794 bool SCI_METHOD Document::SetStyles(int length, const char *styles) {
1795         if (enteredStyling != 0) {
1796                 return false;
1797         } else {
1798                 enteredStyling++;
1799                 bool didChange = false;
1800                 int startMod = 0;
1801                 int endMod = 0;
1802                 for (int iPos = 0; iPos < length; iPos++, endStyled++) {
1803                         PLATFORM_ASSERT(endStyled < Length());
1804                         if (cb.SetStyleAt(endStyled, styles[iPos])) {
1805                                 if (!didChange) {
1806                                         startMod = endStyled;
1807                                 }
1808                                 didChange = true;
1809                                 endMod = endStyled;
1810                         }
1811                 }
1812                 if (didChange) {
1813                         DocModification mh(SC_MOD_CHANGESTYLE | SC_PERFORMED_USER,
1814                                            startMod, endMod - startMod + 1);
1815                         NotifyModified(mh);
1816                 }
1817                 enteredStyling--;
1818                 return true;
1819         }
1820 }
1821
1822 void Document::EnsureStyledTo(int pos) {
1823         if ((enteredStyling == 0) && (pos > GetEndStyled())) {
1824                 IncrementStyleClock();
1825                 if (pli && !pli->UseContainerLexing()) {
1826                         int lineEndStyled = LineFromPosition(GetEndStyled());
1827                         int endStyledTo = LineStart(lineEndStyled);
1828                         pli->Colourise(endStyledTo, pos);
1829                 } else {
1830                         // Ask the watchers to style, and stop as soon as one responds.
1831                         for (std::vector<WatcherWithUserData>::iterator it = watchers.begin();
1832                                 (pos > GetEndStyled()) && (it != watchers.end()); ++it) {
1833                                 it->watcher->NotifyStyleNeeded(this, it->userData, pos);
1834                         }
1835                 }
1836         }
1837 }
1838
1839 void Document::LexerChanged() {
1840         // Tell the watchers the lexer has changed.
1841         for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
1842                 it->watcher->NotifyLexerChanged(this, it->userData);
1843         }
1844 }
1845
1846 int SCI_METHOD Document::SetLineState(int line, int state) {
1847         int statePrevious = static_cast<LineState *>(perLineData[ldState])->SetLineState(line, state);
1848         if (state != statePrevious) {
1849                 DocModification mh(SC_MOD_CHANGELINESTATE, LineStart(line), 0, 0, 0, line);
1850                 NotifyModified(mh);
1851         }
1852         return statePrevious;
1853 }
1854
1855 int SCI_METHOD Document::GetLineState(int line) const {
1856         return static_cast<LineState *>(perLineData[ldState])->GetLineState(line);
1857 }
1858
1859 int Document::GetMaxLineState() {
1860         return static_cast<LineState *>(perLineData[ldState])->GetMaxLineState();
1861 }
1862
1863 void SCI_METHOD Document::ChangeLexerState(int start, int end) {
1864         DocModification mh(SC_MOD_LEXERSTATE, start, end-start, 0, 0, 0);
1865         NotifyModified(mh);
1866 }
1867
1868 StyledText Document::MarginStyledText(int line) const {
1869         LineAnnotation *pla = static_cast<LineAnnotation *>(perLineData[ldMargin]);
1870         return StyledText(pla->Length(line), pla->Text(line),
1871                 pla->MultipleStyles(line), pla->Style(line), pla->Styles(line));
1872 }
1873
1874 void Document::MarginSetText(int line, const char *text) {
1875         static_cast<LineAnnotation *>(perLineData[ldMargin])->SetText(line, text);
1876         DocModification mh(SC_MOD_CHANGEMARGIN, LineStart(line), 0, 0, 0, line);
1877         NotifyModified(mh);
1878 }
1879
1880 void Document::MarginSetStyle(int line, int style) {
1881         static_cast<LineAnnotation *>(perLineData[ldMargin])->SetStyle(line, style);
1882         NotifyModified(DocModification(SC_MOD_CHANGEMARGIN, LineStart(line), 0, 0, 0, line));
1883 }
1884
1885 void Document::MarginSetStyles(int line, const unsigned char *styles) {
1886         static_cast<LineAnnotation *>(perLineData[ldMargin])->SetStyles(line, styles);
1887         NotifyModified(DocModification(SC_MOD_CHANGEMARGIN, LineStart(line), 0, 0, 0, line));
1888 }
1889
1890 void Document::MarginClearAll() {
1891         int maxEditorLine = LinesTotal();
1892         for (int l=0; l<maxEditorLine; l++)
1893                 MarginSetText(l, 0);
1894         // Free remaining data
1895         static_cast<LineAnnotation *>(perLineData[ldMargin])->ClearAll();
1896 }
1897
1898 StyledText Document::AnnotationStyledText(int line) const {
1899         LineAnnotation *pla = static_cast<LineAnnotation *>(perLineData[ldAnnotation]);
1900         return StyledText(pla->Length(line), pla->Text(line),
1901                 pla->MultipleStyles(line), pla->Style(line), pla->Styles(line));
1902 }
1903
1904 void Document::AnnotationSetText(int line, const char *text) {
1905         if (line >= 0 && line < LinesTotal()) {
1906                 const int linesBefore = AnnotationLines(line);
1907                 static_cast<LineAnnotation *>(perLineData[ldAnnotation])->SetText(line, text);
1908                 const int linesAfter = AnnotationLines(line);
1909                 DocModification mh(SC_MOD_CHANGEANNOTATION, LineStart(line), 0, 0, 0, line);
1910                 mh.annotationLinesAdded = linesAfter - linesBefore;
1911                 NotifyModified(mh);
1912         }
1913 }
1914
1915 void Document::AnnotationSetStyle(int line, int style) {
1916         static_cast<LineAnnotation *>(perLineData[ldAnnotation])->SetStyle(line, style);
1917         DocModification mh(SC_MOD_CHANGEANNOTATION, LineStart(line), 0, 0, 0, line);
1918         NotifyModified(mh);
1919 }
1920
1921 void Document::AnnotationSetStyles(int line, const unsigned char *styles) {
1922         if (line >= 0 && line < LinesTotal()) {
1923                 static_cast<LineAnnotation *>(perLineData[ldAnnotation])->SetStyles(line, styles);
1924         }
1925 }
1926
1927 int Document::AnnotationLines(int line) const {
1928         return static_cast<LineAnnotation *>(perLineData[ldAnnotation])->Lines(line);
1929 }
1930
1931 void Document::AnnotationClearAll() {
1932         int maxEditorLine = LinesTotal();
1933         for (int l=0; l<maxEditorLine; l++)
1934                 AnnotationSetText(l, 0);
1935         // Free remaining data
1936         static_cast<LineAnnotation *>(perLineData[ldAnnotation])->ClearAll();
1937 }
1938
1939 void Document::IncrementStyleClock() {
1940         styleClock = (styleClock + 1) % 0x100000;
1941 }
1942
1943 void SCI_METHOD Document::DecorationFillRange(int position, int value, int fillLength) {
1944         if (decorations.FillRange(position, value, fillLength)) {
1945                 DocModification mh(SC_MOD_CHANGEINDICATOR | SC_PERFORMED_USER,
1946                                                         position, fillLength);
1947                 NotifyModified(mh);
1948         }
1949 }
1950
1951 bool Document::AddWatcher(DocWatcher *watcher, void *userData) {
1952         WatcherWithUserData wwud(watcher, userData);
1953         std::vector<WatcherWithUserData>::iterator it =
1954                 std::find(watchers.begin(), watchers.end(), wwud);
1955         if (it != watchers.end())
1956                 return false;
1957         watchers.push_back(wwud);
1958         return true;
1959 }
1960
1961 bool Document::RemoveWatcher(DocWatcher *watcher, void *userData) {
1962         std::vector<WatcherWithUserData>::iterator it =
1963                 std::find(watchers.begin(), watchers.end(), WatcherWithUserData(watcher, userData));
1964         if (it != watchers.end()) {
1965                 watchers.erase(it);
1966                 return true;
1967         }
1968         return false;
1969 }
1970
1971 void Document::NotifyModifyAttempt() {
1972         for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
1973                 it->watcher->NotifyModifyAttempt(this, it->userData);
1974         }
1975 }
1976
1977 void Document::NotifySavePoint(bool atSavePoint) {
1978         for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
1979                 it->watcher->NotifySavePoint(this, it->userData, atSavePoint);
1980         }
1981 }
1982
1983 void Document::NotifyModified(DocModification mh) {
1984         if (mh.modificationType & SC_MOD_INSERTTEXT) {
1985                 decorations.InsertSpace(mh.position, mh.length);
1986         } else if (mh.modificationType & SC_MOD_DELETETEXT) {
1987                 decorations.DeleteRange(mh.position, mh.length);
1988         }
1989         for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
1990                 it->watcher->NotifyModified(this, mh, it->userData);
1991         }
1992 }
1993
1994 bool Document::IsWordPartSeparator(char ch) const {
1995         return (WordCharClass(ch) == CharClassify::ccWord) && IsPunctuation(ch);
1996 }
1997
1998 int Document::WordPartLeft(int pos) {
1999         if (pos > 0) {
2000                 --pos;
2001                 char startChar = cb.CharAt(pos);
2002                 if (IsWordPartSeparator(startChar)) {
2003                         while (pos > 0 && IsWordPartSeparator(cb.CharAt(pos))) {
2004                                 --pos;
2005                         }
2006                 }
2007                 if (pos > 0) {
2008                         startChar = cb.CharAt(pos);
2009                         --pos;
2010                         if (IsLowerCase(startChar)) {
2011                                 while (pos > 0 && IsLowerCase(cb.CharAt(pos)))
2012                                         --pos;
2013                                 if (!IsUpperCase(cb.CharAt(pos)) && !IsLowerCase(cb.CharAt(pos)))
2014                                         ++pos;
2015                         } else if (IsUpperCase(startChar)) {
2016                                 while (pos > 0 && IsUpperCase(cb.CharAt(pos)))
2017                                         --pos;
2018                                 if (!IsUpperCase(cb.CharAt(pos)))
2019                                         ++pos;
2020                         } else if (IsADigit(startChar)) {
2021                                 while (pos > 0 && IsADigit(cb.CharAt(pos)))
2022                                         --pos;
2023                                 if (!IsADigit(cb.CharAt(pos)))
2024                                         ++pos;
2025                         } else if (IsPunctuation(startChar)) {
2026                                 while (pos > 0 && IsPunctuation(cb.CharAt(pos)))
2027                                         --pos;
2028                                 if (!IsPunctuation(cb.CharAt(pos)))
2029                                         ++pos;
2030                         } else if (isspacechar(startChar)) {
2031                                 while (pos > 0 && isspacechar(cb.CharAt(pos)))
2032                                         --pos;
2033                                 if (!isspacechar(cb.CharAt(pos)))
2034                                         ++pos;
2035                         } else if (!IsASCII(startChar)) {
2036                                 while (pos > 0 && !IsASCII(cb.CharAt(pos)))
2037                                         --pos;
2038                                 if (IsASCII(cb.CharAt(pos)))
2039                                         ++pos;
2040                         } else {
2041                                 ++pos;
2042                         }
2043                 }
2044         }
2045         return pos;
2046 }
2047
2048 int Document::WordPartRight(int pos) {
2049         char startChar = cb.CharAt(pos);
2050         int length = Length();
2051         if (IsWordPartSeparator(startChar)) {
2052                 while (pos < length && IsWordPartSeparator(cb.CharAt(pos)))
2053                         ++pos;
2054                 startChar = cb.CharAt(pos);
2055         }
2056         if (!IsASCII(startChar)) {
2057                 while (pos < length && !IsASCII(cb.CharAt(pos)))
2058                         ++pos;
2059         } else if (IsLowerCase(startChar)) {
2060                 while (pos < length && IsLowerCase(cb.CharAt(pos)))
2061                         ++pos;
2062         } else if (IsUpperCase(startChar)) {
2063                 if (IsLowerCase(cb.CharAt(pos + 1))) {
2064                         ++pos;
2065                         while (pos < length && IsLowerCase(cb.CharAt(pos)))
2066                                 ++pos;
2067                 } else {
2068                         while (pos < length && IsUpperCase(cb.CharAt(pos)))
2069                                 ++pos;
2070                 }
2071                 if (IsLowerCase(cb.CharAt(pos)) && IsUpperCase(cb.CharAt(pos - 1)))
2072                         --pos;
2073         } else if (IsADigit(startChar)) {
2074                 while (pos < length && IsADigit(cb.CharAt(pos)))
2075                         ++pos;
2076         } else if (IsPunctuation(startChar)) {
2077                 while (pos < length && IsPunctuation(cb.CharAt(pos)))
2078                         ++pos;
2079         } else if (isspacechar(startChar)) {
2080                 while (pos < length && isspacechar(cb.CharAt(pos)))
2081                         ++pos;
2082         } else {
2083                 ++pos;
2084         }
2085         return pos;
2086 }
2087
2088 bool IsLineEndChar(char c) {
2089         return (c == '\n' || c == '\r');
2090 }
2091
2092 int Document::ExtendStyleRange(int pos, int delta, bool singleLine) {
2093         int sStart = cb.StyleAt(pos);
2094         if (delta < 0) {
2095                 while (pos > 0 && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsLineEndChar(cb.CharAt(pos))))
2096                         pos--;
2097                 pos++;
2098         } else {
2099                 while (pos < (Length()) && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsLineEndChar(cb.CharAt(pos))))
2100                         pos++;
2101         }
2102         return pos;
2103 }
2104
2105 static char BraceOpposite(char ch) {
2106         switch (ch) {
2107         case '(':
2108                 return ')';
2109         case ')':
2110                 return '(';
2111         case '[':
2112                 return ']';
2113         case ']':
2114                 return '[';
2115         case '{':
2116                 return '}';
2117         case '}':
2118                 return '{';
2119         case '<':
2120                 return '>';
2121         case '>':
2122                 return '<';
2123         default:
2124                 return '\0';
2125         }
2126 }
2127
2128 // TODO: should be able to extend styled region to find matching brace
2129 int Document::BraceMatch(int position, int /*maxReStyle*/) {
2130         char chBrace = CharAt(position);
2131         char chSeek = BraceOpposite(chBrace);
2132         if (chSeek == '\0')
2133                 return - 1;
2134         char styBrace = static_cast<char>(StyleAt(position));
2135         int direction = -1;
2136         if (chBrace == '(' || chBrace == '[' || chBrace == '{' || chBrace == '<')
2137                 direction = 1;
2138         int depth = 1;
2139         position = NextPosition(position, direction);
2140         while ((position >= 0) && (position < Length())) {
2141                 char chAtPos = CharAt(position);
2142                 char styAtPos = static_cast<char>(StyleAt(position));
2143                 if ((position > GetEndStyled()) || (styAtPos == styBrace)) {
2144                         if (chAtPos == chBrace)
2145                                 depth++;
2146                         if (chAtPos == chSeek)
2147                                 depth--;
2148                         if (depth == 0)
2149                                 return position;
2150                 }
2151                 int positionBeforeMove = position;
2152                 position = NextPosition(position, direction);
2153                 if (position == positionBeforeMove)
2154                         break;
2155         }
2156         return - 1;
2157 }
2158
2159 /**
2160  * Implementation of RegexSearchBase for the default built-in regular expression engine
2161  */
2162 class BuiltinRegex : public RegexSearchBase {
2163 public:
2164         explicit BuiltinRegex(CharClassify *charClassTable) : search(charClassTable) {}
2165
2166         virtual ~BuiltinRegex() {
2167         }
2168
2169         virtual long FindText(Document *doc, int minPos, int maxPos, const char *s,
2170                         bool caseSensitive, bool word, bool wordStart, int flags,
2171                         int *length);
2172
2173         virtual const char *SubstituteByPosition(Document *doc, const char *text, int *length);
2174
2175 private:
2176         RESearch search;
2177         std::string substituted;
2178 };
2179
2180 // Define a way for the Regular Expression code to access the document
2181 class DocumentIndexer : public CharacterIndexer {
2182         Document *pdoc;
2183         int end;
2184 public:
2185         DocumentIndexer(Document *pdoc_, int end_) :
2186                 pdoc(pdoc_), end(end_) {
2187         }
2188
2189         virtual ~DocumentIndexer() {
2190         }
2191
2192         virtual char CharAt(int index) {
2193                 if (index < 0 || index >= end)
2194                         return 0;
2195                 else
2196                         return pdoc->CharAt(index);
2197         }
2198 };
2199
2200 long BuiltinRegex::FindText(Document *doc, int minPos, int maxPos, const char *s,
2201                         bool caseSensitive, bool, bool, int flags,
2202                         int *length) {
2203         const bool posix = (flags & SCFIND_POSIX) != 0;
2204         const int increment = (minPos <= maxPos) ? 1 : -1;
2205
2206         int startPos = minPos;
2207         int endPos = maxPos;
2208
2209         // Range endpoints should not be inside DBCS characters, but just in case, move them.
2210         startPos = doc->MovePositionOutsideChar(startPos, 1, false);
2211         endPos = doc->MovePositionOutsideChar(endPos, 1, false);
2212
2213         const char *errmsg = search.Compile(s, *length, caseSensitive, posix);
2214         if (errmsg) {
2215                 return -1;
2216         }
2217         // Find a variable in a property file: \$(\([A-Za-z0-9_.]+\))
2218         // Replace first '.' with '-' in each property file variable reference:
2219         //     Search: \$(\([A-Za-z0-9_-]+\)\.\([A-Za-z0-9_.]+\))
2220         //     Replace: $(\1-\2)
2221         int lineRangeStart = doc->LineFromPosition(startPos);
2222         const int lineRangeEnd = doc->LineFromPosition(endPos);
2223         if ((increment == 1) &&
2224                 (startPos >= doc->LineEnd(lineRangeStart)) &&
2225                 (lineRangeStart < lineRangeEnd)) {
2226                 // the start position is at end of line or between line end characters.
2227                 lineRangeStart++;
2228                 startPos = doc->LineStart(lineRangeStart);
2229         } else if ((increment == -1) &&
2230                    (startPos <= doc->LineStart(lineRangeStart)) &&
2231                    (lineRangeStart > lineRangeEnd)) {
2232                 // the start position is at beginning of line.
2233                 lineRangeStart--;
2234                 startPos = doc->LineEnd(lineRangeStart);
2235         }
2236         int pos = -1;
2237         int lenRet = 0;
2238         const char searchEnd = s[*length - 1];
2239         const char searchEndPrev = (*length > 1) ? s[*length - 2] : '\0';
2240         const int lineRangeBreak = lineRangeEnd + increment;
2241         for (int line = lineRangeStart; line != lineRangeBreak; line += increment) {
2242                 int startOfLine = doc->LineStart(line);
2243                 int endOfLine = doc->LineEnd(line);
2244                 if (increment == 1) {
2245                         if (line == lineRangeStart) {
2246                                 if ((startPos != startOfLine) && (s[0] == '^'))
2247                                         continue;       // Can't match start of line if start position after start of line
2248                                 startOfLine = startPos;
2249                         }
2250                         if (line == lineRangeEnd) {
2251                                 if ((endPos != endOfLine) && (searchEnd == '$') && (searchEndPrev != '\\'))
2252                                         continue;       // Can't match end of line if end position before end of line
2253                                 endOfLine = endPos;
2254                         }
2255                 } else {
2256                         if (line == lineRangeEnd) {
2257                                 if ((endPos != startOfLine) && (s[0] == '^'))
2258                                         continue;       // Can't match start of line if end position after start of line
2259                                 startOfLine = endPos;
2260                         }
2261                         if (line == lineRangeStart) {
2262                                 if ((startPos != endOfLine) && (searchEnd == '$') && (searchEndPrev != '\\'))
2263                                         continue;       // Can't match end of line if start position before end of line
2264                                 endOfLine = startPos;
2265                         }
2266                 }
2267
2268                 DocumentIndexer di(doc, endOfLine);
2269                 int success = search.Execute(di, startOfLine, endOfLine);
2270                 if (success) {
2271                         pos = search.bopat[0];
2272                         // Ensure only whole characters selected
2273                         search.eopat[0] = doc->MovePositionOutsideChar(search.eopat[0], 1, false);
2274                         lenRet = search.eopat[0] - search.bopat[0];
2275                         // There can be only one start of a line, so no need to look for last match in line
2276                         if ((increment == -1) && (s[0] != '^')) {
2277                                 // Check for the last match on this line.
2278                                 int repetitions = 1000; // Break out of infinite loop
2279                                 while (success && (search.eopat[0] <= endOfLine) && (repetitions--)) {
2280                                         success = search.Execute(di, pos+1, endOfLine);
2281                                         if (success) {
2282                                                 if (search.eopat[0] <= minPos) {
2283                                                         pos = search.bopat[0];
2284                                                         lenRet = search.eopat[0] - search.bopat[0];
2285                                                 } else {
2286                                                         success = 0;
2287                                                 }
2288                                         }
2289                                 }
2290                         }
2291                         break;
2292                 }
2293         }
2294         *length = lenRet;
2295         return pos;
2296 }
2297
2298 const char *BuiltinRegex::SubstituteByPosition(Document *doc, const char *text, int *length) {
2299         substituted.clear();
2300         DocumentIndexer di(doc, doc->Length());
2301         search.GrabMatches(di);
2302         for (int j = 0; j < *length; j++) {
2303                 if (text[j] == '\\') {
2304                         if (text[j + 1] >= '0' && text[j + 1] <= '9') {
2305                                 unsigned int patNum = text[j + 1] - '0';
2306                                 unsigned int len = search.eopat[patNum] - search.bopat[patNum];
2307                                 if (!search.pat[patNum].empty())        // Will be null if try for a match that did not occur
2308                                         substituted.append(search.pat[patNum].c_str(), len);
2309                                 j++;
2310                         } else {
2311                                 j++;
2312                                 switch (text[j]) {
2313                                 case 'a':
2314                                         substituted.push_back('\a');
2315                                         break;
2316                                 case 'b':
2317                                         substituted.push_back('\b');
2318                                         break;
2319                                 case 'f':
2320                                         substituted.push_back('\f');
2321                                         break;
2322                                 case 'n':
2323                                         substituted.push_back('\n');
2324                                         break;
2325                                 case 'r':
2326                                         substituted.push_back('\r');
2327                                         break;
2328                                 case 't':
2329                                         substituted.push_back('\t');
2330                                         break;
2331                                 case 'v':
2332                                         substituted.push_back('\v');
2333                                         break;
2334                                 case '\\':
2335                                         substituted.push_back('\\');
2336                                         break;
2337                                 default:
2338                                         substituted.push_back('\\');
2339                                         j--;
2340                                 }
2341                         }
2342                 } else {
2343                         substituted.push_back(text[j]);
2344                 }
2345         }
2346         *length = static_cast<int>(substituted.length());
2347         return substituted.c_str();
2348 }
2349
2350 #ifndef SCI_OWNREGEX
2351
2352 #ifdef SCI_NAMESPACE
2353
2354 RegexSearchBase *Scintilla::CreateRegexSearch(CharClassify *charClassTable) {
2355         return new BuiltinRegex(charClassTable);
2356 }
2357
2358 #else
2359
2360 RegexSearchBase *CreateRegexSearch(CharClassify *charClassTable) {
2361         return new BuiltinRegex(charClassTable);
2362 }
2363
2364 #endif
2365
2366 #endif