ext/scintilla/src/Document.cxx

   1 // Scintilla source code edit control
   2 /** @file Document.cxx
   3  ** Text document that handles notifications, DBCS, styling, words and end of line.
   4  **/
   5 // Copyright 1998-2011 by Neil Hodgson <neilh@scintilla.org>
   6 // The License.txt file describes the conditions under which this software may be distributed.
   7
   8 #include <stdlib.h>
   9 #include <string.h>
  10 #include <stdio.h>
  11 #include <assert.h>
  12 #include <ctype.h>
  13
  14 #include <stdexcept>
  15 #include <string>
  16 #include <vector>
  17 #include <algorithm>
  18
  19 #ifdef CXX11_REGEX
  20 #include <regex>
  21 #endif
  22
  23 #include "Platform.h"
  24
  25 #include "ILexer.h"
  26 #include "Scintilla.h"
  27
  28 #include "CharacterSet.h"
  29 #include "SplitVector.h"
  30 #include "Partitioning.h"
  31 #include "RunStyles.h"
  32 #include "CellBuffer.h"
  33 #include "PerLine.h"
  34 #include "CharClassify.h"
  35 #include "Decoration.h"
  36 #include "CaseFolder.h"
  37 #include "Document.h"
  38 #include "RESearch.h"
  39 #include "UniConversion.h"
  40 #include "UnicodeFromUTF8.h"
  41
  42 #ifdef SCI_NAMESPACE
  43 using namespace Scintilla;
  44 #endif
  45
  46 static inline bool IsPunctuation(char ch) {
  47         return IsASCII(ch) && ispunct(ch);
  48 }
  49
  50 void LexInterface::Colourise(int start, int end) {
  51         if (pdoc && instance && !performingStyle) {
  52                 // Protect against reentrance, which may occur, for example, when
  53                 // fold points are discovered while performing styling and the folding
  54                 // code looks for child lines which may trigger styling.
  55                 performingStyle = true;
  56
  57                 int lengthDoc = pdoc->Length();
  58                 if (end == -1)
  59                         end = lengthDoc;
  60                 int len = end - start;
  61
  62                 PLATFORM_ASSERT(len >= 0);
  63                 PLATFORM_ASSERT(start + len <= lengthDoc);
  64
  65                 int styleStart = 0;
  66                 if (start > 0)
  67                         styleStart = pdoc->StyleAt(start - 1);
  68
  69                 if (len > 0) {
  70                         instance->Lex(start, len, styleStart, pdoc);
  71                         instance->Fold(start, len, styleStart, pdoc);
  72                 }
  73
  74                 performingStyle = false;
  75         }
  76 }
  77
  78 int LexInterface::LineEndTypesSupported() {
  79         if (instance) {
  80                 int interfaceVersion = instance->Version();
  81                 if (interfaceVersion >= lvSubStyles) {
  82                         ILexerWithSubStyles *ssinstance = static_cast<ILexerWithSubStyles *>(instance);
  83                         return ssinstance->LineEndTypesSupported();
  84                 }
  85         }
  86         return 0;
  87 }
  88
  89 Document::Document() {
  90         refCount = 0;
  91         pcf = NULL;
  92 #ifdef _WIN32
  93         eolMode = SC_EOL_CRLF;
  94 #else
  95         eolMode = SC_EOL_LF;
  96 #endif
  97         dbcsCodePage = 0;
  98         lineEndBitSet = SC_LINE_END_TYPE_DEFAULT;
  99         endStyled = 0;
 100         styleClock = 0;
 101         enteredModification = 0;
 102         enteredStyling = 0;
 103         enteredReadOnlyCount = 0;
 104         insertionSet = false;
 105         tabInChars = 8;
 106         indentInChars = 0;
 107         actualIndentInChars = 8;
 108         useTabs = true;
 109         tabIndents = true;
 110         backspaceUnindents = false;
 111
 112         matchesValid = false;
 113         regex = 0;
 114
 115         UTF8BytesOfLeadInitialise();
 116
 117         perLineData[ldMarkers] = new LineMarkers();
 118         perLineData[ldLevels] = new LineLevels();
 119         perLineData[ldState] = new LineState();
 120         perLineData[ldMargin] = new LineAnnotation();
 121         perLineData[ldAnnotation] = new LineAnnotation();
 122
 123         cb.SetPerLine(this);
 124
 125         pli = 0;
 126 }
 127
 128 Document::~Document() {
 129         for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
 130                 it->watcher->NotifyDeleted(this, it->userData);
 131         }
 132         for (int j=0; j<ldSize; j++) {
 133                 delete perLineData[j];
 134                 perLineData[j] = 0;
 135         }
 136         delete regex;
 137         regex = 0;
 138         delete pli;
 139         pli = 0;
 140         delete pcf;
 141         pcf = 0;
 142 }
 143
 144 void Document::Init() {
 145         for (int j=0; j<ldSize; j++) {
 146                 if (perLineData[j])
 147                         perLineData[j]->Init();
 148         }
 149 }
 150
 151 int Document::LineEndTypesSupported() const {
 152         if ((SC_CP_UTF8 == dbcsCodePage) && pli)
 153                 return pli->LineEndTypesSupported();
 154         else
 155                 return 0;
 156 }
 157
 158 bool Document::SetDBCSCodePage(int dbcsCodePage_) {
 159         if (dbcsCodePage != dbcsCodePage_) {
 160                 dbcsCodePage = dbcsCodePage_;
 161                 SetCaseFolder(NULL);
 162                 cb.SetLineEndTypes(lineEndBitSet & LineEndTypesSupported());
 163                 return true;
 164         } else {
 165                 return false;
 166         }
 167 }
 168
 169 bool Document::SetLineEndTypesAllowed(int lineEndBitSet_) {
 170         if (lineEndBitSet != lineEndBitSet_) {
 171                 lineEndBitSet = lineEndBitSet_;
 172                 int lineEndBitSetActive = lineEndBitSet & LineEndTypesSupported();
 173                 if (lineEndBitSetActive != cb.GetLineEndTypes()) {
 174                         ModifiedAt(0);
 175                         cb.SetLineEndTypes(lineEndBitSetActive);
 176                         return true;
 177                 } else {
 178                         return false;
 179                 }
 180         } else {
 181                 return false;
 182         }
 183 }
 184
 185 void Document::InsertLine(int line) {
 186         for (int j=0; j<ldSize; j++) {
 187                 if (perLineData[j])
 188                         perLineData[j]->InsertLine(line);
 189         }
 190 }
 191
 192 void Document::RemoveLine(int line) {
 193         for (int j=0; j<ldSize; j++) {
 194                 if (perLineData[j])
 195                         perLineData[j]->RemoveLine(line);
 196         }
 197 }
 198
 199 // Increase reference count and return its previous value.
 200 int Document::AddRef() {
 201         return refCount++;
 202 }
 203
 204 // Decrease reference count and return its previous value.
 205 // Delete the document if reference count reaches zero.
 206 int SCI_METHOD Document::Release() {
 207         int curRefCount = --refCount;
 208         if (curRefCount == 0)
 209                 delete this;
 210         return curRefCount;
 211 }
 212
 213 void Document::SetSavePoint() {
 214         cb.SetSavePoint();
 215         NotifySavePoint(true);
 216 }
 217
 218 void Document::TentativeUndo() {
 219         CheckReadOnly();
 220         if (enteredModification == 0) {
 221                 enteredModification++;
 222                 if (!cb.IsReadOnly()) {
 223                         bool startSavePoint = cb.IsSavePoint();
 224                         bool multiLine = false;
 225                         int steps = cb.TentativeSteps();
 226                         //Platform::DebugPrintf("Steps=%d\n", steps);
 227                         for (int step = 0; step < steps; step++) {
 228                                 const int prevLinesTotal = LinesTotal();
 229                                 const Action &action = cb.GetUndoStep();
 230                                 if (action.at == removeAction) {
 231                                         NotifyModified(DocModification(
 232                                                                         SC_MOD_BEFOREINSERT | SC_PERFORMED_UNDO, action));
 233                                 } else if (action.at == containerAction) {
 234                                         DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_UNDO);
 235                                         dm.token = action.position;
 236                                         NotifyModified(dm);
 237                                 } else {
 238                                         NotifyModified(DocModification(
 239                                                                         SC_MOD_BEFOREDELETE | SC_PERFORMED_UNDO, action));
 240                                 }
 241                                 cb.PerformUndoStep();
 242                                 if (action.at != containerAction) {
 243                                         ModifiedAt(action.position);
 244                                 }
 245
 246                                 int modFlags = SC_PERFORMED_UNDO;
 247                                 // With undo, an insertion action becomes a deletion notification
 248                                 if (action.at == removeAction) {
 249                                         modFlags |= SC_MOD_INSERTTEXT;
 250                                 } else if (action.at == insertAction) {
 251                                         modFlags |= SC_MOD_DELETETEXT;
 252                                 }
 253                                 if (steps > 1)
 254                                         modFlags |= SC_MULTISTEPUNDOREDO;
 255                                 const int linesAdded = LinesTotal() - prevLinesTotal;
 256                                 if (linesAdded != 0)
 257                                         multiLine = true;
 258                                 if (step == steps - 1) {
 259                                         modFlags |= SC_LASTSTEPINUNDOREDO;
 260                                         if (multiLine)
 261                                                 modFlags |= SC_MULTILINEUNDOREDO;
 262                                 }
 263                                 NotifyModified(DocModification(modFlags, action.position, action.lenData,
 264                                                                                            linesAdded, action.data));
 265                         }
 266
 267                         bool endSavePoint = cb.IsSavePoint();
 268                         if (startSavePoint != endSavePoint)
 269                                 NotifySavePoint(endSavePoint);
 270
 271                         cb.TentativeCommit();
 272                 }
 273                 enteredModification--;
 274         }
 275 }
 276
 277 int Document::GetMark(int line) {
 278         return static_cast<LineMarkers *>(perLineData[ldMarkers])->MarkValue(line);
 279 }
 280
 281 int Document::MarkerNext(int lineStart, int mask) const {
 282         return static_cast<LineMarkers *>(perLineData[ldMarkers])->MarkerNext(lineStart, mask);
 283 }
 284
 285 int Document::AddMark(int line, int markerNum) {
 286         if (line >= 0 && line <= LinesTotal()) {
 287                 int prev = static_cast<LineMarkers *>(perLineData[ldMarkers])->
 288                         AddMark(line, markerNum, LinesTotal());
 289                 DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, 0, line);
 290                 NotifyModified(mh);
 291                 return prev;
 292         } else {
 293                 return 0;
 294         }
 295 }
 296
 297 void Document::AddMarkSet(int line, int valueSet) {
 298         if (line < 0 || line > LinesTotal()) {
 299                 return;
 300         }
 301         unsigned int m = valueSet;
 302         for (int i = 0; m; i++, m >>= 1)
 303                 if (m & 1)
 304                         static_cast<LineMarkers *>(perLineData[ldMarkers])->
 305                                 AddMark(line, i, LinesTotal());
 306         DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, 0, line);
 307         NotifyModified(mh);
 308 }
 309
 310 void Document::DeleteMark(int line, int markerNum) {
 311         static_cast<LineMarkers *>(perLineData[ldMarkers])->DeleteMark(line, markerNum, false);
 312         DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, 0, line);
 313         NotifyModified(mh);
 314 }
 315
 316 void Document::DeleteMarkFromHandle(int markerHandle) {
 317         static_cast<LineMarkers *>(perLineData[ldMarkers])->DeleteMarkFromHandle(markerHandle);
 318         DocModification mh(SC_MOD_CHANGEMARKER, 0, 0, 0, 0);
 319         mh.line = -1;
 320         NotifyModified(mh);
 321 }
 322
 323 void Document::DeleteAllMarks(int markerNum) {
 324         bool someChanges = false;
 325         for (int line = 0; line < LinesTotal(); line++) {
 326                 if (static_cast<LineMarkers *>(perLineData[ldMarkers])->DeleteMark(line, markerNum, true))
 327                         someChanges = true;
 328         }
 329         if (someChanges) {
 330                 DocModification mh(SC_MOD_CHANGEMARKER, 0, 0, 0, 0);
 331                 mh.line = -1;
 332                 NotifyModified(mh);
 333         }
 334 }
 335
 336 int Document::LineFromHandle(int markerHandle) {
 337         return static_cast<LineMarkers *>(perLineData[ldMarkers])->LineFromHandle(markerHandle);
 338 }
 339
 340 int SCI_METHOD Document::LineStart(int line) const {
 341         return cb.LineStart(line);
 342 }
 343
 344 bool Document::IsLineStartPosition(int position) const {
 345         return LineStart(LineFromPosition(position)) == position;
 346 }
 347
 348 int SCI_METHOD Document::LineEnd(int line) const {
 349         if (line >= LinesTotal() - 1) {
 350                 return LineStart(line + 1);
 351         } else {
 352                 int position = LineStart(line + 1);
 353                 if (SC_CP_UTF8 == dbcsCodePage) {
 354                         unsigned char bytes[] = {
 355                                 static_cast<unsigned char>(cb.CharAt(position-3)),
 356                                 static_cast<unsigned char>(cb.CharAt(position-2)),
 357                                 static_cast<unsigned char>(cb.CharAt(position-1)),
 358                         };
 359                         if (UTF8IsSeparator(bytes)) {
 360                                 return position - UTF8SeparatorLength;
 361                         }
 362                         if (UTF8IsNEL(bytes+1)) {
 363                                 return position - UTF8NELLength;
 364                         }
 365                 }
 366                 position--; // Back over CR or LF
 367                 // When line terminator is CR+LF, may need to go back one more
 368                 if ((position > LineStart(line)) && (cb.CharAt(position - 1) == '\r')) {
 369                         position--;
 370                 }
 371                 return position;
 372         }
 373 }
 374
 375 void SCI_METHOD Document::SetErrorStatus(int status) {
 376         // Tell the watchers an error has occurred.
 377         for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
 378                 it->watcher->NotifyErrorOccurred(this, it->userData, status);
 379         }
 380 }
 381
 382 int SCI_METHOD Document::LineFromPosition(int pos) const {
 383         return cb.LineFromPosition(pos);
 384 }
 385
 386 int Document::LineEndPosition(int position) const {
 387         return LineEnd(LineFromPosition(position));
 388 }
 389
 390 bool Document::IsLineEndPosition(int position) const {
 391         return LineEnd(LineFromPosition(position)) == position;
 392 }
 393
 394 bool Document::IsPositionInLineEnd(int position) const {
 395         return position >= LineEnd(LineFromPosition(position));
 396 }
 397
 398 int Document::VCHomePosition(int position) const {
 399         int line = LineFromPosition(position);
 400         int startPosition = LineStart(line);
 401         int endLine = LineEnd(line);
 402         int startText = startPosition;
 403         while (startText < endLine && (cb.CharAt(startText) == ' ' || cb.CharAt(startText) == '\t'))
 404                 startText++;
 405         if (position == startText)
 406                 return startPosition;
 407         else
 408                 return startText;
 409 }
 410
 411 int SCI_METHOD Document::SetLevel(int line, int level) {
 412         int prev = static_cast<LineLevels *>(perLineData[ldLevels])->SetLevel(line, level, LinesTotal());
 413         if (prev != level) {
 414                 DocModification mh(SC_MOD_CHANGEFOLD | SC_MOD_CHANGEMARKER,
 415                                    LineStart(line), 0, 0, 0, line);
 416                 mh.foldLevelNow = level;
 417                 mh.foldLevelPrev = prev;
 418                 NotifyModified(mh);
 419         }
 420         return prev;
 421 }
 422
 423 int SCI_METHOD Document::GetLevel(int line) const {
 424         return static_cast<LineLevels *>(perLineData[ldLevels])->GetLevel(line);
 425 }
 426
 427 void Document::ClearLevels() {
 428         static_cast<LineLevels *>(perLineData[ldLevels])->ClearLevels();
 429 }
 430
 431 static bool IsSubordinate(int levelStart, int levelTry) {
 432         if (levelTry & SC_FOLDLEVELWHITEFLAG)
 433                 return true;
 434         else
 435                 return (levelStart & SC_FOLDLEVELNUMBERMASK) < (levelTry & SC_FOLDLEVELNUMBERMASK);
 436 }
 437
 438 int Document::GetLastChild(int lineParent, int level, int lastLine) {
 439         if (level == -1)
 440                 level = GetLevel(lineParent) & SC_FOLDLEVELNUMBERMASK;
 441         int maxLine = LinesTotal();
 442         int lookLastLine = (lastLine != -1) ? Platform::Minimum(LinesTotal() - 1, lastLine) : -1;
 443         int lineMaxSubord = lineParent;
 444         while (lineMaxSubord < maxLine - 1) {
 445                 EnsureStyledTo(LineStart(lineMaxSubord + 2));
 446                 if (!IsSubordinate(level, GetLevel(lineMaxSubord + 1)))
 447                         break;
 448                 if ((lookLastLine != -1) && (lineMaxSubord >= lookLastLine) && !(GetLevel(lineMaxSubord) & SC_FOLDLEVELWHITEFLAG))
 449                         break;
 450                 lineMaxSubord++;
 451         }
 452         if (lineMaxSubord > lineParent) {
 453                 if (level > (GetLevel(lineMaxSubord + 1) & SC_FOLDLEVELNUMBERMASK)) {
 454                         // Have chewed up some whitespace that belongs to a parent so seek back
 455                         if (GetLevel(lineMaxSubord) & SC_FOLDLEVELWHITEFLAG) {
 456                                 lineMaxSubord--;
 457                         }
 458                 }
 459         }
 460         return lineMaxSubord;
 461 }
 462
 463 int Document::GetFoldParent(int line) const {
 464         int level = GetLevel(line) & SC_FOLDLEVELNUMBERMASK;
 465         int lineLook = line - 1;
 466         while ((lineLook > 0) && (
 467                     (!(GetLevel(lineLook) & SC_FOLDLEVELHEADERFLAG)) ||
 468                     ((GetLevel(lineLook) & SC_FOLDLEVELNUMBERMASK) >= level))
 469               ) {
 470                 lineLook--;
 471         }
 472         if ((GetLevel(lineLook) & SC_FOLDLEVELHEADERFLAG) &&
 473                 ((GetLevel(lineLook) & SC_FOLDLEVELNUMBERMASK) < level)) {
 474                 return lineLook;
 475         } else {
 476                 return -1;
 477         }
 478 }
 479
 480 void Document::GetHighlightDelimiters(HighlightDelimiter &highlightDelimiter, int line, int lastLine) {
 481         int level = GetLevel(line);
 482         int lookLastLine = Platform::Maximum(line, lastLine) + 1;
 483
 484         int lookLine = line;
 485         int lookLineLevel = level;
 486         int lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
 487         while ((lookLine > 0) && ((lookLineLevel & SC_FOLDLEVELWHITEFLAG) ||
 488                 ((lookLineLevel & SC_FOLDLEVELHEADERFLAG) && (lookLineLevelNum >= (GetLevel(lookLine + 1) & SC_FOLDLEVELNUMBERMASK))))) {
 489                 lookLineLevel = GetLevel(--lookLine);
 490                 lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
 491         }
 492
 493         int beginFoldBlock = (lookLineLevel & SC_FOLDLEVELHEADERFLAG) ? lookLine : GetFoldParent(lookLine);
 494         if (beginFoldBlock == -1) {
 495                 highlightDelimiter.Clear();
 496                 return;
 497         }
 498
 499         int endFoldBlock = GetLastChild(beginFoldBlock, -1, lookLastLine);
 500         int firstChangeableLineBefore = -1;
 501         if (endFoldBlock < line) {
 502                 lookLine = beginFoldBlock - 1;
 503                 lookLineLevel = GetLevel(lookLine);
 504                 lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
 505                 while ((lookLine >= 0) && (lookLineLevelNum >= SC_FOLDLEVELBASE)) {
 506                         if (lookLineLevel & SC_FOLDLEVELHEADERFLAG) {
 507                                 if (GetLastChild(lookLine, -1, lookLastLine) == line) {
 508                                         beginFoldBlock = lookLine;
 509                                         endFoldBlock = line;
 510                                         firstChangeableLineBefore = line - 1;
 511                                 }
 512                         }
 513                         if ((lookLine > 0) && (lookLineLevelNum == SC_FOLDLEVELBASE) && ((GetLevel(lookLine - 1) & SC_FOLDLEVELNUMBERMASK) > lookLineLevelNum))
 514                                 break;
 515                         lookLineLevel = GetLevel(--lookLine);
 516                         lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
 517                 }
 518         }
 519         if (firstChangeableLineBefore == -1) {
 520                 for (lookLine = line - 1, lookLineLevel = GetLevel(lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
 521                         lookLine >= beginFoldBlock;
 522                         lookLineLevel = GetLevel(--lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK) {
 523                         if ((lookLineLevel & SC_FOLDLEVELWHITEFLAG) || (lookLineLevelNum > (level & SC_FOLDLEVELNUMBERMASK))) {
 524                                 firstChangeableLineBefore = lookLine;
 525                                 break;
 526                         }
 527                 }
 528         }
 529         if (firstChangeableLineBefore == -1)
 530                 firstChangeableLineBefore = beginFoldBlock - 1;
 531
 532         int firstChangeableLineAfter = -1;
 533         for (lookLine = line + 1, lookLineLevel = GetLevel(lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
 534                 lookLine <= endFoldBlock;
 535                 lookLineLevel = GetLevel(++lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK) {
 536                 if ((lookLineLevel & SC_FOLDLEVELHEADERFLAG) && (lookLineLevelNum < (GetLevel(lookLine + 1) & SC_FOLDLEVELNUMBERMASK))) {
 537                         firstChangeableLineAfter = lookLine;
 538                         break;
 539                 }
 540         }
 541         if (firstChangeableLineAfter == -1)
 542                 firstChangeableLineAfter = endFoldBlock + 1;
 543
 544         highlightDelimiter.beginFoldBlock = beginFoldBlock;
 545         highlightDelimiter.endFoldBlock = endFoldBlock;
 546         highlightDelimiter.firstChangeableLineBefore = firstChangeableLineBefore;
 547         highlightDelimiter.firstChangeableLineAfter = firstChangeableLineAfter;
 548 }
 549
 550 int Document::ClampPositionIntoDocument(int pos) const {
 551         return Platform::Clamp(pos, 0, Length());
 552 }
 553
 554 bool Document::IsCrLf(int pos) const {
 555         if (pos < 0)
 556                 return false;
 557         if (pos >= (Length() - 1))
 558                 return false;
 559         return (cb.CharAt(pos) == '\r') && (cb.CharAt(pos + 1) == '\n');
 560 }
 561
 562 int Document::LenChar(int pos) {
 563         if (pos < 0) {
 564                 return 1;
 565         } else if (IsCrLf(pos)) {
 566                 return 2;
 567         } else if (SC_CP_UTF8 == dbcsCodePage) {
 568                 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(pos));
 569                 const int widthCharBytes = UTF8BytesOfLead[leadByte];
 570                 int lengthDoc = Length();
 571                 if ((pos + widthCharBytes) > lengthDoc)
 572                         return lengthDoc - pos;
 573                 else
 574                         return widthCharBytes;
 575         } else if (dbcsCodePage) {
 576                 return IsDBCSLeadByte(cb.CharAt(pos)) ? 2 : 1;
 577         } else {
 578                 return 1;
 579         }
 580 }
 581
 582 bool Document::InGoodUTF8(int pos, int &start, int &end) const {
 583         int trail = pos;
 584         while ((trail>0) && (pos-trail < UTF8MaxBytes) && UTF8IsTrailByte(static_cast<unsigned char>(cb.CharAt(trail-1))))
 585                 trail--;
 586         start = (trail > 0) ? trail-1 : trail;
 587
 588         const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(start));
 589         const int widthCharBytes = UTF8BytesOfLead[leadByte];
 590         if (widthCharBytes == 1) {
 591                 return false;
 592         } else {
 593                 int trailBytes = widthCharBytes - 1;
 594                 int len = pos - start;
 595                 if (len > trailBytes)
 596                         // pos too far from lead
 597                         return false;
 598                 char charBytes[UTF8MaxBytes] = {static_cast<char>(leadByte),0,0,0};
 599                 for (int b=1; b<widthCharBytes && ((start+b) < Length()); b++)
 600                         charBytes[b] = cb.CharAt(static_cast<int>(start+b));
 601                 int utf8status = UTF8Classify(reinterpret_cast<const unsigned char *>(charBytes), widthCharBytes);
 602                 if (utf8status & UTF8MaskInvalid)
 603                         return false;
 604                 end = start + widthCharBytes;
 605                 return true;
 606         }
 607 }
 608
 609 // Normalise a position so that it is not halfway through a two byte character.
 610 // This can occur in two situations -
 611 // When lines are terminated with \r\n pairs which should be treated as one character.
 612 // When displaying DBCS text such as Japanese.
 613 // If moving, move the position in the indicated direction.
 614 int Document::MovePositionOutsideChar(int pos, int moveDir, bool checkLineEnd) const {
 615         //Platform::DebugPrintf("NoCRLF %d %d\n", pos, moveDir);
 616         // If out of range, just return minimum/maximum value.
 617         if (pos <= 0)
 618                 return 0;
 619         if (pos >= Length())
 620                 return Length();
 621
 622         // PLATFORM_ASSERT(pos > 0 && pos < Length());
 623         if (checkLineEnd && IsCrLf(pos - 1)) {
 624                 if (moveDir > 0)
 625                         return pos + 1;
 626                 else
 627                         return pos - 1;
 628         }
 629
 630         if (dbcsCodePage) {
 631                 if (SC_CP_UTF8 == dbcsCodePage) {
 632                         unsigned char ch = static_cast<unsigned char>(cb.CharAt(pos));
 633                         // If ch is not a trail byte then pos is valid intercharacter position
 634                         if (UTF8IsTrailByte(ch)) {
 635                                 int startUTF = pos;
 636                                 int endUTF = pos;
 637                                 if (InGoodUTF8(pos, startUTF, endUTF)) {
 638                                         // ch is a trail byte within a UTF-8 character
 639                                         if (moveDir > 0)
 640                                                 pos = endUTF;
 641                                         else
 642                                                 pos = startUTF;
 643                                 }
 644                                 // Else invalid UTF-8 so return position of isolated trail byte
 645                         }
 646                 } else {
 647                         // Anchor DBCS calculations at start of line because start of line can
 648                         // not be a DBCS trail byte.
 649                         int posStartLine = LineStart(LineFromPosition(pos));
 650                         if (pos == posStartLine)
 651                                 return pos;
 652
 653                         // Step back until a non-lead-byte is found.
 654                         int posCheck = pos;
 655                         while ((posCheck > posStartLine) && IsDBCSLeadByte(cb.CharAt(posCheck-1)))
 656                                 posCheck--;
 657
 658                         // Check from known start of character.
 659                         while (posCheck < pos) {
 660                                 int mbsize = IsDBCSLeadByte(cb.CharAt(posCheck)) ? 2 : 1;
 661                                 if (posCheck + mbsize == pos) {
 662                                         return pos;
 663                                 } else if (posCheck + mbsize > pos) {
 664                                         if (moveDir > 0) {
 665                                                 return posCheck + mbsize;
 666                                         } else {
 667                                                 return posCheck;
 668                                         }
 669                                 }
 670                                 posCheck += mbsize;
 671                         }
 672                 }
 673         }
 674
 675         return pos;
 676 }
 677
 678 // NextPosition moves between valid positions - it can not handle a position in the middle of a
 679 // multi-byte character. It is used to iterate through text more efficiently than MovePositionOutsideChar.
 680 // A \r\n pair is treated as two characters.
 681 int Document::NextPosition(int pos, int moveDir) const {
 682         // If out of range, just return minimum/maximum value.
 683         int increment = (moveDir > 0) ? 1 : -1;
 684         if (pos + increment <= 0)
 685                 return 0;
 686         if (pos + increment >= Length())
 687                 return Length();
 688
 689         if (dbcsCodePage) {
 690                 if (SC_CP_UTF8 == dbcsCodePage) {
 691                         if (increment == 1) {
 692                                 // Simple forward movement case so can avoid some checks
 693                                 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(pos));
 694                                 if (UTF8IsAscii(leadByte)) {
 695                                         // Single byte character or invalid
 696                                         pos++;
 697                                 } else {
 698                                         const int widthCharBytes = UTF8BytesOfLead[leadByte];
 699                                         char charBytes[UTF8MaxBytes] = {static_cast<char>(leadByte),0,0,0};
 700                                         for (int b=1; b<widthCharBytes; b++)
 701                                                 charBytes[b] = cb.CharAt(static_cast<int>(pos+b));
 702                                         int utf8status = UTF8Classify(reinterpret_cast<const unsigned char *>(charBytes), widthCharBytes);
 703                                         if (utf8status & UTF8MaskInvalid)
 704                                                 pos++;
 705                                         else
 706                                                 pos += utf8status & UTF8MaskWidth;
 707                                 }
 708                         } else {
 709                                 // Examine byte before position
 710                                 pos--;
 711                                 unsigned char ch = static_cast<unsigned char>(cb.CharAt(pos));
 712                                 // If ch is not a trail byte then pos is valid intercharacter position
 713                                 if (UTF8IsTrailByte(ch)) {
 714                                         // If ch is a trail byte in a valid UTF-8 character then return start of character
 715                                         int startUTF = pos;
 716                                         int endUTF = pos;
 717                                         if (InGoodUTF8(pos, startUTF, endUTF)) {
 718                                                 pos = startUTF;
 719                                         }
 720                                         // Else invalid UTF-8 so return position of isolated trail byte
 721                                 }
 722                         }
 723                 } else {
 724                         if (moveDir > 0) {
 725                                 int mbsize = IsDBCSLeadByte(cb.CharAt(pos)) ? 2 : 1;
 726                                 pos += mbsize;
 727                                 if (pos > Length())
 728                                         pos = Length();
 729                         } else {
 730                                 // Anchor DBCS calculations at start of line because start of line can
 731                                 // not be a DBCS trail byte.
 732                                 int posStartLine = LineStart(LineFromPosition(pos));
 733                                 // See http://msdn.microsoft.com/en-us/library/cc194792%28v=MSDN.10%29.aspx
 734                                 // http://msdn.microsoft.com/en-us/library/cc194790.aspx
 735                                 if ((pos - 1) <= posStartLine) {
 736                                         return pos - 1;
 737                                 } else if (IsDBCSLeadByte(cb.CharAt(pos - 1))) {
 738                                         // Must actually be trail byte
 739                                         return pos - 2;
 740                                 } else {
 741                                         // Otherwise, step back until a non-lead-byte is found.
 742                                         int posTemp = pos - 1;
 743                                         while (posStartLine <= --posTemp && IsDBCSLeadByte(cb.CharAt(posTemp)))
 744                                                 ;
 745                                         // Now posTemp+1 must point to the beginning of a character,
 746                                         // so figure out whether we went back an even or an odd
 747                                         // number of bytes and go back 1 or 2 bytes, respectively.
 748                                         return (pos - 1 - ((pos - posTemp) & 1));
 749                                 }
 750                         }
 751                 }
 752         } else {
 753                 pos += increment;
 754         }
 755
 756         return pos;
 757 }
 758
 759 bool Document::NextCharacter(int &pos, int moveDir) const {
 760         // Returns true if pos changed
 761         int posNext = NextPosition(pos, moveDir);
 762         if (posNext == pos) {
 763                 return false;
 764         } else {
 765                 pos = posNext;
 766                 return true;
 767         }
 768 }
 769
 770 // Return -1  on out-of-bounds
 771 int SCI_METHOD Document::GetRelativePosition(int positionStart, int characterOffset) const {
 772         int pos = positionStart;
 773         if (dbcsCodePage) {
 774                 const int increment = (characterOffset > 0) ? 1 : -1;
 775                 while (characterOffset != 0) {
 776                         const int posNext = NextPosition(pos, increment);
 777                         if (posNext == pos)
 778                                 return INVALID_POSITION;
 779                         pos = posNext;
 780                         characterOffset -= increment;
 781                 }
 782         } else {
 783                 pos = positionStart + characterOffset;
 784                 if ((pos < 0) || (pos > Length()))
 785                         return INVALID_POSITION;
 786         }
 787         return pos;
 788 }
 789
 790 int Document::GetRelativePositionUTF16(int positionStart, int characterOffset) const {
 791         int pos = positionStart;
 792         if (dbcsCodePage) {
 793                 const int increment = (characterOffset > 0) ? 1 : -1;
 794                 while (characterOffset != 0) {
 795                         const int posNext = NextPosition(pos, increment);
 796                         if (posNext == pos)
 797                                 return INVALID_POSITION;
 798                         if (abs(pos-posNext) > 3)       // 4 byte character = 2*UTF16.
 799                                 characterOffset -= increment;
 800                         pos = posNext;
 801                         characterOffset -= increment;
 802                 }
 803         } else {
 804                 pos = positionStart + characterOffset;
 805                 if ((pos < 0) || (pos > Length()))
 806                         return INVALID_POSITION;
 807         }
 808         return pos;
 809 }
 810
 811 int SCI_METHOD Document::GetCharacterAndWidth(int position, int *pWidth) const {
 812         int character;
 813         int bytesInCharacter = 1;
 814         if (dbcsCodePage) {
 815                 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(position));
 816                 if (SC_CP_UTF8 == dbcsCodePage) {
 817                         if (UTF8IsAscii(leadByte)) {
 818                                 // Single byte character or invalid
 819                                 character =  leadByte;
 820                         } else {
 821                                 const int widthCharBytes = UTF8BytesOfLead[leadByte];
 822                                 unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0};
 823                                 for (int b=1; b<widthCharBytes; b++)
 824                                         charBytes[b] = static_cast<unsigned char>(cb.CharAt(position+b));
 825                                 int utf8status = UTF8Classify(charBytes, widthCharBytes);
 826                                 if (utf8status & UTF8MaskInvalid) {
 827                                         // Report as singleton surrogate values which are invalid Unicode
 828                                         character =  0xDC80 + leadByte;
 829                                 } else {
 830                                         bytesInCharacter = utf8status & UTF8MaskWidth;
 831                                         character = UnicodeFromUTF8(charBytes);
 832                                 }
 833                         }
 834                 } else {
 835                         if (IsDBCSLeadByte(leadByte)) {
 836                                 bytesInCharacter = 2;
 837                                 character = (leadByte << 8) | static_cast<unsigned char>(cb.CharAt(position+1));
 838                         } else {
 839                                 character = leadByte;
 840                         }
 841                 }
 842         } else {
 843                 character = cb.CharAt(position);
 844         }
 845         if (pWidth) {
 846                 *pWidth = bytesInCharacter;
 847         }
 848         return character;
 849 }
 850
 851 int SCI_METHOD Document::CodePage() const {
 852         return dbcsCodePage;
 853 }
 854
 855 bool SCI_METHOD Document::IsDBCSLeadByte(char ch) const {
 856         // Byte ranges found in Wikipedia articles with relevant search strings in each case
 857         unsigned char uch = static_cast<unsigned char>(ch);
 858         switch (dbcsCodePage) {
 859                 case 932:
 860                         // Shift_jis
 861                         return ((uch >= 0x81) && (uch <= 0x9F)) ||
 862                                 ((uch >= 0xE0) && (uch <= 0xFC));
 863                                 // Lead bytes F0 to FC may be a Microsoft addition.
 864                 case 936:
 865                         // GBK
 866                         return (uch >= 0x81) && (uch <= 0xFE);
 867                 case 949:
 868                         // Korean Wansung KS C-5601-1987
 869                         return (uch >= 0x81) && (uch <= 0xFE);
 870                 case 950:
 871                         // Big5
 872                         return (uch >= 0x81) && (uch <= 0xFE);
 873                 case 1361:
 874                         // Korean Johab KS C-5601-1992
 875                         return
 876                                 ((uch >= 0x84) && (uch <= 0xD3)) ||
 877                                 ((uch >= 0xD8) && (uch <= 0xDE)) ||
 878                                 ((uch >= 0xE0) && (uch <= 0xF9));
 879         }
 880         return false;
 881 }
 882
 883 static inline bool IsSpaceOrTab(int ch) {
 884         return ch == ' ' || ch == '\t';
 885 }
 886
 887 // Need to break text into segments near lengthSegment but taking into
 888 // account the encoding to not break inside a UTF-8 or DBCS character
 889 // and also trying to avoid breaking inside a pair of combining characters.
 890 // The segment length must always be long enough (more than 4 bytes)
 891 // so that there will be at least one whole character to make a segment.
 892 // For UTF-8, text must consist only of valid whole characters.
 893 // In preference order from best to worst:
 894 //   1) Break after space
 895 //   2) Break before punctuation
 896 //   3) Break after whole character
 897
 898 int Document::SafeSegment(const char *text, int length, int lengthSegment) const {
 899         if (length <= lengthSegment)
 900                 return length;
 901         int lastSpaceBreak = -1;
 902         int lastPunctuationBreak = -1;
 903         int lastEncodingAllowedBreak = 0;
 904         for (int j=0; j < lengthSegment;) {
 905                 unsigned char ch = static_cast<unsigned char>(text[j]);
 906                 if (j > 0) {
 907                         if (IsSpaceOrTab(text[j - 1]) && !IsSpaceOrTab(text[j])) {
 908                                 lastSpaceBreak = j;
 909                         }
 910                         if (ch < 'A') {
 911                                 lastPunctuationBreak = j;
 912                         }
 913                 }
 914                 lastEncodingAllowedBreak = j;
 915
 916                 if (dbcsCodePage == SC_CP_UTF8) {
 917                         j += UTF8BytesOfLead[ch];
 918                 } else if (dbcsCodePage) {
 919                         j += IsDBCSLeadByte(ch) ? 2 : 1;
 920                 } else {
 921                         j++;
 922                 }
 923         }
 924         if (lastSpaceBreak >= 0) {
 925                 return lastSpaceBreak;
 926         } else if (lastPunctuationBreak >= 0) {
 927                 return lastPunctuationBreak;
 928         }
 929         return lastEncodingAllowedBreak;
 930 }
 931
 932 EncodingFamily Document::CodePageFamily() const {
 933         if (SC_CP_UTF8 == dbcsCodePage)
 934                 return efUnicode;
 935         else if (dbcsCodePage)
 936                 return efDBCS;
 937         else
 938                 return efEightBit;
 939 }
 940
 941 void Document::ModifiedAt(int pos) {
 942         if (endStyled > pos)
 943                 endStyled = pos;
 944 }
 945
 946 void Document::CheckReadOnly() {
 947         if (cb.IsReadOnly() && enteredReadOnlyCount == 0) {
 948                 enteredReadOnlyCount++;
 949                 NotifyModifyAttempt();
 950                 enteredReadOnlyCount--;
 951         }
 952 }
 953
 954 // Document only modified by gateways DeleteChars, InsertString, Undo, Redo, and SetStyleAt.
 955 // SetStyleAt does not change the persistent state of a document
 956
 957 bool Document::DeleteChars(int pos, int len) {
 958         if (pos < 0)
 959                 return false;
 960         if (len <= 0)
 961                 return false;
 962         if ((pos + len) > Length())
 963                 return false;
 964         CheckReadOnly();
 965         if (enteredModification != 0) {
 966                 return false;
 967         } else {
 968                 enteredModification++;
 969                 if (!cb.IsReadOnly()) {
 970                         NotifyModified(
 971                             DocModification(
 972                                 SC_MOD_BEFOREDELETE | SC_PERFORMED_USER,
 973                                 pos, len,
 974                                 0, 0));
 975                         int prevLinesTotal = LinesTotal();
 976                         bool startSavePoint = cb.IsSavePoint();
 977                         bool startSequence = false;
 978                         const char *text = cb.DeleteChars(pos, len, startSequence);
 979                         if (startSavePoint && cb.IsCollectingUndo())
 980                                 NotifySavePoint(!startSavePoint);
 981                         if ((pos < Length()) || (pos == 0))
 982                                 ModifiedAt(pos);
 983                         else
 984                                 ModifiedAt(pos-1);
 985                         NotifyModified(
 986                             DocModification(
 987                                 SC_MOD_DELETETEXT | SC_PERFORMED_USER | (startSequence?SC_STARTACTION:0),
 988                                 pos, len,
 989                                 LinesTotal() - prevLinesTotal, text));
 990                 }
 991                 enteredModification--;
 992         }
 993         return !cb.IsReadOnly();
 994 }
 995
 996 /**
 997  * Insert a string with a length.
 998  */
 999 int Document::InsertString(int position, const char *s, int insertLength) {
1000         if (insertLength <= 0) {
1001                 return 0;
1002         }
1003         CheckReadOnly();        // Application may change read only state here
1004         if (cb.IsReadOnly()) {
1005                 return 0;
1006         }
1007         if (enteredModification != 0) {
1008                 return 0;
1009         }
1010         enteredModification++;
1011         insertionSet = false;
1012         insertion.clear();
1013         NotifyModified(
1014                 DocModification(
1015                         SC_MOD_INSERTCHECK,
1016                         position, insertLength,
1017                         0, s));
1018         if (insertionSet) {
1019                 s = insertion.c_str();
1020                 insertLength = static_cast<int>(insertion.length());
1021         }
1022         NotifyModified(
1023                 DocModification(
1024                         SC_MOD_BEFOREINSERT | SC_PERFORMED_USER,
1025                         position, insertLength,
1026                         0, s));
1027         int prevLinesTotal = LinesTotal();
1028         bool startSavePoint = cb.IsSavePoint();
1029         bool startSequence = false;
1030         const char *text = cb.InsertString(position, s, insertLength, startSequence);
1031         if (startSavePoint && cb.IsCollectingUndo())
1032                 NotifySavePoint(!startSavePoint);
1033         ModifiedAt(position);
1034         NotifyModified(
1035                 DocModification(
1036                         SC_MOD_INSERTTEXT | SC_PERFORMED_USER | (startSequence?SC_STARTACTION:0),
1037                         position, insertLength,
1038                         LinesTotal() - prevLinesTotal, text));
1039         if (insertionSet) {     // Free memory as could be large
1040                 std::string().swap(insertion);
1041         }
1042         enteredModification--;
1043         return insertLength;
1044 }
1045
1046 void Document::ChangeInsertion(const char *s, int length) {
1047         insertionSet = true;
1048         insertion.assign(s, length);
1049 }
1050
1051 int SCI_METHOD Document::AddData(char *data, int length) {
1052         try {
1053                 int position = Length();
1054                 InsertString(position, data, length);
1055         } catch (std::bad_alloc &) {
1056                 return SC_STATUS_BADALLOC;
1057         } catch (...) {
1058                 return SC_STATUS_FAILURE;
1059         }
1060         return 0;
1061 }
1062
1063 void * SCI_METHOD Document::ConvertToDocument() {
1064         return this;
1065 }
1066
1067 int Document::Undo() {
1068         int newPos = -1;
1069         CheckReadOnly();
1070         if ((enteredModification == 0) && (cb.IsCollectingUndo())) {
1071                 enteredModification++;
1072                 if (!cb.IsReadOnly()) {
1073                         bool startSavePoint = cb.IsSavePoint();
1074                         bool multiLine = false;
1075                         int steps = cb.StartUndo();
1076                         //Platform::DebugPrintf("Steps=%d\n", steps);
1077                         int coalescedRemovePos = -1;
1078                         int coalescedRemoveLen = 0;
1079                         int prevRemoveActionPos = -1;
1080                         int prevRemoveActionLen = 0;
1081                         for (int step = 0; step < steps; step++) {
1082                                 const int prevLinesTotal = LinesTotal();
1083                                 const Action &action = cb.GetUndoStep();
1084                                 if (action.at == removeAction) {
1085                                         NotifyModified(DocModification(
1086                                                                         SC_MOD_BEFOREINSERT | SC_PERFORMED_UNDO, action));
1087                                 } else if (action.at == containerAction) {
1088                                         DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_UNDO);
1089                                         dm.token = action.position;
1090                                         NotifyModified(dm);
1091                                         if (!action.mayCoalesce) {
1092                                                 coalescedRemovePos = -1;
1093                                                 coalescedRemoveLen = 0;
1094                                                 prevRemoveActionPos = -1;
1095                                                 prevRemoveActionLen = 0;
1096                                         }
1097                                 } else {
1098                                         NotifyModified(DocModification(
1099                                                                         SC_MOD_BEFOREDELETE | SC_PERFORMED_UNDO, action));
1100                                 }
1101                                 cb.PerformUndoStep();
1102                                 if (action.at != containerAction) {
1103                                         ModifiedAt(action.position);
1104                                         newPos = action.position;
1105                                 }
1106
1107                                 int modFlags = SC_PERFORMED_UNDO;
1108                                 // With undo, an insertion action becomes a deletion notification
1109                                 if (action.at == removeAction) {
1110                                         newPos += action.lenData;
1111                                         modFlags |= SC_MOD_INSERTTEXT;
1112                                         if ((coalescedRemoveLen > 0) &&
1113                                                 (action.position == prevRemoveActionPos || action.position == (prevRemoveActionPos + prevRemoveActionLen))) {
1114                                                 coalescedRemoveLen += action.lenData;
1115                                                 newPos = coalescedRemovePos + coalescedRemoveLen;
1116                                         } else {
1117                                                 coalescedRemovePos = action.position;
1118                                                 coalescedRemoveLen = action.lenData;
1119                                         }
1120                                         prevRemoveActionPos = action.position;
1121                                         prevRemoveActionLen = action.lenData;
1122                                 } else if (action.at == insertAction) {
1123                                         modFlags |= SC_MOD_DELETETEXT;
1124                                         coalescedRemovePos = -1;
1125                                         coalescedRemoveLen = 0;
1126                                         prevRemoveActionPos = -1;
1127                                         prevRemoveActionLen = 0;
1128                                 }
1129                                 if (steps > 1)
1130                                         modFlags |= SC_MULTISTEPUNDOREDO;
1131                                 const int linesAdded = LinesTotal() - prevLinesTotal;
1132                                 if (linesAdded != 0)
1133                                         multiLine = true;
1134                                 if (step == steps - 1) {
1135                                         modFlags |= SC_LASTSTEPINUNDOREDO;
1136                                         if (multiLine)
1137                                                 modFlags |= SC_MULTILINEUNDOREDO;
1138                                 }
1139                                 NotifyModified(DocModification(modFlags, action.position, action.lenData,
1140                                                                                            linesAdded, action.data));
1141                         }
1142
1143                         bool endSavePoint = cb.IsSavePoint();
1144                         if (startSavePoint != endSavePoint)
1145                                 NotifySavePoint(endSavePoint);
1146                 }
1147                 enteredModification--;
1148         }
1149         return newPos;
1150 }
1151
1152 int Document::Redo() {
1153         int newPos = -1;
1154         CheckReadOnly();
1155         if ((enteredModification == 0) && (cb.IsCollectingUndo())) {
1156                 enteredModification++;
1157                 if (!cb.IsReadOnly()) {
1158                         bool startSavePoint = cb.IsSavePoint();
1159                         bool multiLine = false;
1160                         int steps = cb.StartRedo();
1161                         for (int step = 0; step < steps; step++) {
1162                                 const int prevLinesTotal = LinesTotal();
1163                                 const Action &action = cb.GetRedoStep();
1164                                 if (action.at == insertAction) {
1165                                         NotifyModified(DocModification(
1166                                                                         SC_MOD_BEFOREINSERT | SC_PERFORMED_REDO, action));
1167                                 } else if (action.at == containerAction) {
1168                                         DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_REDO);
1169                                         dm.token = action.position;
1170                                         NotifyModified(dm);
1171                                 } else {
1172                                         NotifyModified(DocModification(
1173                                                                         SC_MOD_BEFOREDELETE | SC_PERFORMED_REDO, action));
1174                                 }
1175                                 cb.PerformRedoStep();
1176                                 if (action.at != containerAction) {
1177                                         ModifiedAt(action.position);
1178                                         newPos = action.position;
1179                                 }
1180
1181                                 int modFlags = SC_PERFORMED_REDO;
1182                                 if (action.at == insertAction) {
1183                                         newPos += action.lenData;
1184                                         modFlags |= SC_MOD_INSERTTEXT;
1185                                 } else if (action.at == removeAction) {
1186                                         modFlags |= SC_MOD_DELETETEXT;
1187                                 }
1188                                 if (steps > 1)
1189                                         modFlags |= SC_MULTISTEPUNDOREDO;
1190                                 const int linesAdded = LinesTotal() - prevLinesTotal;
1191                                 if (linesAdded != 0)
1192                                         multiLine = true;
1193                                 if (step == steps - 1) {
1194                                         modFlags |= SC_LASTSTEPINUNDOREDO;
1195                                         if (multiLine)
1196                                                 modFlags |= SC_MULTILINEUNDOREDO;
1197                                 }
1198                                 NotifyModified(
1199                                         DocModification(modFlags, action.position, action.lenData,
1200                                                                         linesAdded, action.data));
1201                         }
1202
1203                         bool endSavePoint = cb.IsSavePoint();
1204                         if (startSavePoint != endSavePoint)
1205                                 NotifySavePoint(endSavePoint);
1206                 }
1207                 enteredModification--;
1208         }
1209         return newPos;
1210 }
1211
1212 void Document::DelChar(int pos) {
1213         DeleteChars(pos, LenChar(pos));
1214 }
1215
1216 void Document::DelCharBack(int pos) {
1217         if (pos <= 0) {
1218                 return;
1219         } else if (IsCrLf(pos - 2)) {
1220                 DeleteChars(pos - 2, 2);
1221         } else if (dbcsCodePage) {
1222                 int startChar = NextPosition(pos, -1);
1223                 DeleteChars(startChar, pos - startChar);
1224         } else {
1225                 DeleteChars(pos - 1, 1);
1226         }
1227 }
1228
1229 static int NextTab(int pos, int tabSize) {
1230         return ((pos / tabSize) + 1) * tabSize;
1231 }
1232
1233 static std::string CreateIndentation(int indent, int tabSize, bool insertSpaces) {
1234         std::string indentation;
1235         if (!insertSpaces) {
1236                 while (indent >= tabSize) {
1237                         indentation += '\t';
1238                         indent -= tabSize;
1239                 }
1240         }
1241         while (indent > 0) {
1242                 indentation += ' ';
1243                 indent--;
1244         }
1245         return indentation;
1246 }
1247
1248 int SCI_METHOD Document::GetLineIndentation(int line) {
1249         int indent = 0;
1250         if ((line >= 0) && (line < LinesTotal())) {
1251                 int lineStart = LineStart(line);
1252                 int length = Length();
1253                 for (int i = lineStart; i < length; i++) {
1254                         char ch = cb.CharAt(i);
1255                         if (ch == ' ')
1256                                 indent++;
1257                         else if (ch == '\t')
1258                                 indent = NextTab(indent, tabInChars);
1259                         else
1260                                 return indent;
1261                 }
1262         }
1263         return indent;
1264 }
1265
1266 int Document::SetLineIndentation(int line, int indent) {
1267         int indentOfLine = GetLineIndentation(line);
1268         if (indent < 0)
1269                 indent = 0;
1270         if (indent != indentOfLine) {
1271                 std::string linebuf = CreateIndentation(indent, tabInChars, !useTabs);
1272                 int thisLineStart = LineStart(line);
1273                 int indentPos = GetLineIndentPosition(line);
1274                 UndoGroup ug(this);
1275                 DeleteChars(thisLineStart, indentPos - thisLineStart);
1276                 return thisLineStart + InsertString(thisLineStart, linebuf.c_str(),
1277                         static_cast<int>(linebuf.length()));
1278         } else {
1279                 return GetLineIndentPosition(line);
1280         }
1281 }
1282
1283 int Document::GetLineIndentPosition(int line) const {
1284         if (line < 0)
1285                 return 0;
1286         int pos = LineStart(line);
1287         int length = Length();
1288         while ((pos < length) && IsSpaceOrTab(cb.CharAt(pos))) {
1289                 pos++;
1290         }
1291         return pos;
1292 }
1293
1294 int Document::GetColumn(int pos) {
1295         int column = 0;
1296         int line = LineFromPosition(pos);
1297         if ((line >= 0) && (line < LinesTotal())) {
1298                 for (int i = LineStart(line); i < pos;) {
1299                         char ch = cb.CharAt(i);
1300                         if (ch == '\t') {
1301                                 column = NextTab(column, tabInChars);
1302                                 i++;
1303                         } else if (ch == '\r') {
1304                                 return column;
1305                         } else if (ch == '\n') {
1306                                 return column;
1307                         } else if (i >= Length()) {
1308                                 return column;
1309                         } else {
1310                                 column++;
1311                                 i = NextPosition(i, 1);
1312                         }
1313                 }
1314         }
1315         return column;
1316 }
1317
1318 int Document::CountCharacters(int startPos, int endPos) const {
1319         startPos = MovePositionOutsideChar(startPos, 1, false);
1320         endPos = MovePositionOutsideChar(endPos, -1, false);
1321         int count = 0;
1322         int i = startPos;
1323         while (i < endPos) {
1324                 count++;
1325                 if (IsCrLf(i))
1326                         i++;
1327                 i = NextPosition(i, 1);
1328         }
1329         return count;
1330 }
1331
1332 int Document::CountUTF16(int startPos, int endPos) const {
1333         startPos = MovePositionOutsideChar(startPos, 1, false);
1334         endPos = MovePositionOutsideChar(endPos, -1, false);
1335         int count = 0;
1336         int i = startPos;
1337         while (i < endPos) {
1338                 count++;
1339                 const int next = NextPosition(i, 1);
1340                 if ((next - i) > 3)
1341                         count++;
1342                 i = next;
1343         }
1344         return count;
1345 }
1346
1347 int Document::FindColumn(int line, int column) {
1348         int position = LineStart(line);
1349         if ((line >= 0) && (line < LinesTotal())) {
1350                 int columnCurrent = 0;
1351                 while ((columnCurrent < column) && (position < Length())) {
1352                         char ch = cb.CharAt(position);
1353                         if (ch == '\t') {
1354                                 columnCurrent = NextTab(columnCurrent, tabInChars);
1355                                 if (columnCurrent > column)
1356                                         return position;
1357                                 position++;
1358                         } else if (ch == '\r') {
1359                                 return position;
1360                         } else if (ch == '\n') {
1361                                 return position;
1362                         } else {
1363                                 columnCurrent++;
1364                                 position = NextPosition(position, 1);
1365                         }
1366                 }
1367         }
1368         return position;
1369 }
1370
1371 void Document::Indent(bool forwards, int lineBottom, int lineTop) {
1372         // Dedent - suck white space off the front of the line to dedent by equivalent of a tab
1373         for (int line = lineBottom; line >= lineTop; line--) {
1374                 int indentOfLine = GetLineIndentation(line);
1375                 if (forwards) {
1376                         if (LineStart(line) < LineEnd(line)) {
1377                                 SetLineIndentation(line, indentOfLine + IndentSize());
1378                         }
1379                 } else {
1380                         SetLineIndentation(line, indentOfLine - IndentSize());
1381                 }
1382         }
1383 }
1384
1385 // Convert line endings for a piece of text to a particular mode.
1386 // Stop at len or when a NUL is found.
1387 std::string Document::TransformLineEnds(const char *s, size_t len, int eolModeWanted) {
1388         std::string dest;
1389         for (size_t i = 0; (i < len) && (s[i]); i++) {
1390                 if (s[i] == '\n' || s[i] == '\r') {
1391                         if (eolModeWanted == SC_EOL_CR) {
1392                                 dest.push_back('\r');
1393                         } else if (eolModeWanted == SC_EOL_LF) {
1394                                 dest.push_back('\n');
1395                         } else { // eolModeWanted == SC_EOL_CRLF
1396                                 dest.push_back('\r');
1397                                 dest.push_back('\n');
1398                         }
1399                         if ((s[i] == '\r') && (i+1 < len) && (s[i+1] == '\n')) {
1400                                 i++;
1401                         }
1402                 } else {
1403                         dest.push_back(s[i]);
1404                 }
1405         }
1406         return dest;
1407 }
1408
1409 void Document::ConvertLineEnds(int eolModeSet) {
1410         UndoGroup ug(this);
1411
1412         for (int pos = 0; pos < Length(); pos++) {
1413                 if (cb.CharAt(pos) == '\r') {
1414                         if (cb.CharAt(pos + 1) == '\n') {
1415                                 // CRLF
1416                                 if (eolModeSet == SC_EOL_CR) {
1417                                         DeleteChars(pos + 1, 1); // Delete the LF
1418                                 } else if (eolModeSet == SC_EOL_LF) {
1419                                         DeleteChars(pos, 1); // Delete the CR
1420                                 } else {
1421                                         pos++;
1422                                 }
1423                         } else {
1424                                 // CR
1425                                 if (eolModeSet == SC_EOL_CRLF) {
1426                                         pos += InsertString(pos + 1, "\n", 1); // Insert LF
1427                                 } else if (eolModeSet == SC_EOL_LF) {
1428                                         pos += InsertString(pos, "\n", 1); // Insert LF
1429                                         DeleteChars(pos, 1); // Delete CR
1430                                         pos--;
1431                                 }
1432                         }
1433                 } else if (cb.CharAt(pos) == '\n') {
1434                         // LF
1435                         if (eolModeSet == SC_EOL_CRLF) {
1436                                 pos += InsertString(pos, "\r", 1); // Insert CR
1437                         } else if (eolModeSet == SC_EOL_CR) {
1438                                 pos += InsertString(pos, "\r", 1); // Insert CR
1439                                 DeleteChars(pos, 1); // Delete LF
1440                                 pos--;
1441                         }
1442                 }
1443         }
1444
1445 }
1446
1447 bool Document::IsWhiteLine(int line) const {
1448         int currentChar = LineStart(line);
1449         int endLine = LineEnd(line);
1450         while (currentChar < endLine) {
1451                 if (cb.CharAt(currentChar) != ' ' && cb.CharAt(currentChar) != '\t') {
1452                         return false;
1453                 }
1454                 ++currentChar;
1455         }
1456         return true;
1457 }
1458
1459 int Document::ParaUp(int pos) const {
1460         int line = LineFromPosition(pos);
1461         line--;
1462         while (line >= 0 && IsWhiteLine(line)) { // skip empty lines
1463                 line--;
1464         }
1465         while (line >= 0 && !IsWhiteLine(line)) { // skip non-empty lines
1466                 line--;
1467         }
1468         line++;
1469         return LineStart(line);
1470 }
1471
1472 int Document::ParaDown(int pos) const {
1473         int line = LineFromPosition(pos);
1474         while (line < LinesTotal() && !IsWhiteLine(line)) { // skip non-empty lines
1475                 line++;
1476         }
1477         while (line < LinesTotal() && IsWhiteLine(line)) { // skip empty lines
1478                 line++;
1479         }
1480         if (line < LinesTotal())
1481                 return LineStart(line);
1482         else // end of a document
1483                 return LineEnd(line-1);
1484 }
1485
1486 CharClassify::cc Document::WordCharClass(unsigned char ch) const {
1487         if ((SC_CP_UTF8 == dbcsCodePage) && (!UTF8IsAscii(ch)))
1488                 return CharClassify::ccWord;
1489         return charClass.GetClass(ch);
1490 }
1491
1492 /**
1493  * Used by commmands that want to select whole words.
1494  * Finds the start of word at pos when delta < 0 or the end of the word when delta >= 0.
1495  */
1496 int Document::ExtendWordSelect(int pos, int delta, bool onlyWordCharacters) {
1497         CharClassify::cc ccStart = CharClassify::ccWord;
1498         if (delta < 0) {
1499                 if (!onlyWordCharacters)
1500                         ccStart = WordCharClass(cb.CharAt(pos-1));
1501                 while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == ccStart))
1502                         pos--;
1503         } else {
1504                 if (!onlyWordCharacters && pos < Length())
1505                         ccStart = WordCharClass(cb.CharAt(pos));
1506                 while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == ccStart))
1507                         pos++;
1508         }
1509         return MovePositionOutsideChar(pos, delta, true);
1510 }
1511
1512 /**
1513  * Find the start of the next word in either a forward (delta >= 0) or backwards direction
1514  * (delta < 0).
1515  * This is looking for a transition between character classes although there is also some
1516  * additional movement to transit white space.
1517  * Used by cursor movement by word commands.
1518  */
1519 int Document::NextWordStart(int pos, int delta) {
1520         if (delta < 0) {
1521                 while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == CharClassify::ccSpace))
1522                         pos--;
1523                 if (pos > 0) {
1524                         CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos-1));
1525                         while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == ccStart)) {
1526                                 pos--;
1527                         }
1528                 }
1529         } else {
1530                 CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos));
1531                 while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == ccStart))
1532                         pos++;
1533                 while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == CharClassify::ccSpace))
1534                         pos++;
1535         }
1536         return pos;
1537 }
1538
1539 /**
1540  * Find the end of the next word in either a forward (delta >= 0) or backwards direction
1541  * (delta < 0).
1542  * This is looking for a transition between character classes although there is also some
1543  * additional movement to transit white space.
1544  * Used by cursor movement by word commands.
1545  */
1546 int Document::NextWordEnd(int pos, int delta) {
1547         if (delta < 0) {
1548                 if (pos > 0) {
1549                         CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos-1));
1550                         if (ccStart != CharClassify::ccSpace) {
1551                                 while (pos > 0 && WordCharClass(cb.CharAt(pos - 1)) == ccStart) {
1552                                         pos--;
1553                                 }
1554                         }
1555                         while (pos > 0 && WordCharClass(cb.CharAt(pos - 1)) == CharClassify::ccSpace) {
1556                                 pos--;
1557                         }
1558                 }
1559         } else {
1560                 while (pos < Length() && WordCharClass(cb.CharAt(pos)) == CharClassify::ccSpace) {
1561                         pos++;
1562                 }
1563                 if (pos < Length()) {
1564                         CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos));
1565                         while (pos < Length() && WordCharClass(cb.CharAt(pos)) == ccStart) {
1566                                 pos++;
1567                         }
1568                 }
1569         }
1570         return pos;
1571 }
1572
1573 /**
1574  * Check that the character at the given position is a word or punctuation character and that
1575  * the previous character is of a different character class.
1576  */
1577 bool Document::IsWordStartAt(int pos) const {
1578         if (pos > 0) {
1579                 CharClassify::cc ccPos = WordCharClass(CharAt(pos));
1580                 return (ccPos == CharClassify::ccWord || ccPos == CharClassify::ccPunctuation) &&
1581                         (ccPos != WordCharClass(CharAt(pos - 1)));
1582         }
1583         return true;
1584 }
1585
1586 /**
1587  * Check that the character at the given position is a word or punctuation character and that
1588  * the next character is of a different character class.
1589  */
1590 bool Document::IsWordEndAt(int pos) const {
1591         if (pos < Length()) {
1592                 CharClassify::cc ccPrev = WordCharClass(CharAt(pos-1));
1593                 return (ccPrev == CharClassify::ccWord || ccPrev == CharClassify::ccPunctuation) &&
1594                         (ccPrev != WordCharClass(CharAt(pos)));
1595         }
1596         return true;
1597 }
1598
1599 /**
1600  * Check that the given range is has transitions between character classes at both
1601  * ends and where the characters on the inside are word or punctuation characters.
1602  */
1603 bool Document::IsWordAt(int start, int end) const {
1604         return IsWordStartAt(start) && IsWordEndAt(end);
1605 }
1606
1607 bool Document::MatchesWordOptions(bool word, bool wordStart, int pos, int length) const {
1608         return (!word && !wordStart) ||
1609                         (word && IsWordAt(pos, pos + length)) ||
1610                         (wordStart && IsWordStartAt(pos));
1611 }
1612
1613 bool Document::HasCaseFolder(void) const {
1614         return pcf != 0;
1615 }
1616
1617 void Document::SetCaseFolder(CaseFolder *pcf_) {
1618         delete pcf;
1619         pcf = pcf_;
1620 }
1621
1622 Document::CharacterExtracted Document::ExtractCharacter(int position) const {
1623         const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(position));
1624         if (UTF8IsAscii(leadByte)) {
1625                 // Common case: ASCII character
1626                 return CharacterExtracted(leadByte, 1);
1627         }
1628         const int widthCharBytes = UTF8BytesOfLead[leadByte];
1629         unsigned char charBytes[UTF8MaxBytes] = { leadByte, 0, 0, 0 };
1630         for (int b=1; b<widthCharBytes; b++)
1631                 charBytes[b] = static_cast<unsigned char>(cb.CharAt(position + b));
1632         int utf8status = UTF8Classify(charBytes, widthCharBytes);
1633         if (utf8status & UTF8MaskInvalid) {
1634                 // Treat as invalid and use up just one byte
1635                 return CharacterExtracted(unicodeReplacementChar, 1);
1636         } else {
1637                 return CharacterExtracted(UnicodeFromUTF8(charBytes), utf8status & UTF8MaskWidth);
1638         }
1639 }
1640
1641 /**
1642  * Find text in document, supporting both forward and backward
1643  * searches (just pass minPos > maxPos to do a backward search)
1644  * Has not been tested with backwards DBCS searches yet.
1645  */
1646 long Document::FindText(int minPos, int maxPos, const char *search,
1647                         bool caseSensitive, bool word, bool wordStart, bool regExp, int flags,
1648                         int *length) {
1649         if (*length <= 0)
1650                 return minPos;
1651         if (regExp) {
1652                 if (!regex)
1653                         regex = CreateRegexSearch(&charClass);
1654                 return regex->FindText(this, minPos, maxPos, search, caseSensitive, word, wordStart, flags, length);
1655         } else {
1656
1657                 const bool forward = minPos <= maxPos;
1658                 const int increment = forward ? 1 : -1;
1659
1660                 // Range endpoints should not be inside DBCS characters, but just in case, move them.
1661                 const int startPos = MovePositionOutsideChar(minPos, increment, false);
1662                 const int endPos = MovePositionOutsideChar(maxPos, increment, false);
1663
1664                 // Compute actual search ranges needed
1665                 const int lengthFind = *length;
1666
1667                 //Platform::DebugPrintf("Find %d %d %s %d\n", startPos, endPos, ft->lpstrText, lengthFind);
1668                 const int limitPos = Platform::Maximum(startPos, endPos);
1669                 int pos = startPos;
1670                 if (!forward) {
1671                         // Back all of a character
1672                         pos = NextPosition(pos, increment);
1673                 }
1674                 if (caseSensitive) {
1675                         const int endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos;
1676                         const char charStartSearch =  search[0];
1677                         while (forward ? (pos < endSearch) : (pos >= endSearch)) {
1678                                 if (CharAt(pos) == charStartSearch) {
1679                                         bool found = (pos + lengthFind) <= limitPos;
1680                                         for (int indexSearch = 1; (indexSearch < lengthFind) && found; indexSearch++) {
1681                                                 found = CharAt(pos + indexSearch) == search[indexSearch];
1682                                         }
1683                                         if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
1684                                                 return pos;
1685                                         }
1686                                 }
1687                                 if (!NextCharacter(pos, increment))
1688                                         break;
1689                         }
1690                 } else if (SC_CP_UTF8 == dbcsCodePage) {
1691                         const size_t maxFoldingExpansion = 4;
1692                         std::vector<char> searchThing(lengthFind * UTF8MaxBytes * maxFoldingExpansion + 1);
1693                         const int lenSearch = static_cast<int>(
1694                                 pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind));
1695                         char bytes[UTF8MaxBytes + 1];
1696                         char folded[UTF8MaxBytes * maxFoldingExpansion + 1];
1697                         while (forward ? (pos < endPos) : (pos >= endPos)) {
1698                                 int widthFirstCharacter = 0;
1699                                 int posIndexDocument = pos;
1700                                 int indexSearch = 0;
1701                                 bool characterMatches = true;
1702                                 for (;;) {
1703                                         const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(posIndexDocument));
1704                                         bytes[0] = leadByte;
1705                                         int widthChar = 1;
1706                                         if (!UTF8IsAscii(leadByte)) {
1707                                                 const int widthCharBytes = UTF8BytesOfLead[leadByte];
1708                                                 for (int b=1; b<widthCharBytes; b++) {
1709                                                         bytes[b] = cb.CharAt(posIndexDocument+b);
1710                                                 }
1711                                                 widthChar = UTF8Classify(reinterpret_cast<const unsigned char *>(bytes), widthCharBytes) & UTF8MaskWidth;
1712                                         }
1713                                         if (!widthFirstCharacter)
1714                                                 widthFirstCharacter = widthChar;
1715                                         if ((posIndexDocument + widthChar) > limitPos)
1716                                                 break;
1717                                         const int lenFlat = static_cast<int>(pcf->Fold(folded, sizeof(folded), bytes, widthChar));
1718                                         folded[lenFlat] = 0;
1719                                         // Does folded match the buffer
1720                                         characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat);
1721                                         if (!characterMatches)
1722                                                 break;
1723                                         posIndexDocument += widthChar;
1724                                         indexSearch += lenFlat;
1725                                         if (indexSearch >= lenSearch)
1726                                                 break;
1727                                 }
1728                                 if (characterMatches && (indexSearch == static_cast<int>(lenSearch))) {
1729                                         if (MatchesWordOptions(word, wordStart, pos, posIndexDocument - pos)) {
1730                                                 *length = posIndexDocument - pos;
1731                                                 return pos;
1732                                         }
1733                                 }
1734                                 if (forward) {
1735                                         pos += widthFirstCharacter;
1736                                 } else {
1737                                         if (!NextCharacter(pos, increment))
1738                                                 break;
1739                                 }
1740                         }
1741                 } else if (dbcsCodePage) {
1742                         const size_t maxBytesCharacter = 2;
1743                         const size_t maxFoldingExpansion = 4;
1744                         std::vector<char> searchThing(lengthFind * maxBytesCharacter * maxFoldingExpansion + 1);
1745                         const int lenSearch = static_cast<int>(
1746                                 pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind));
1747                         while (forward ? (pos < endPos) : (pos >= endPos)) {
1748                                 int indexDocument = 0;
1749                                 int indexSearch = 0;
1750                                 bool characterMatches = true;
1751                                 while (characterMatches &&
1752                                         ((pos + indexDocument) < limitPos) &&
1753                                         (indexSearch < lenSearch)) {
1754                                         char bytes[maxBytesCharacter + 1];
1755                                         bytes[0] = cb.CharAt(pos + indexDocument);
1756                                         const int widthChar = IsDBCSLeadByte(bytes[0]) ? 2 : 1;
1757                                         if (widthChar == 2)
1758                                                 bytes[1] = cb.CharAt(pos + indexDocument + 1);
1759                                         if ((pos + indexDocument + widthChar) > limitPos)
1760                                                 break;
1761                                         char folded[maxBytesCharacter * maxFoldingExpansion + 1];
1762                                         const int lenFlat = static_cast<int>(pcf->Fold(folded, sizeof(folded), bytes, widthChar));
1763                                         folded[lenFlat] = 0;
1764                                         // Does folded match the buffer
1765                                         characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat);
1766                                         indexDocument += widthChar;
1767                                         indexSearch += lenFlat;
1768                                 }
1769                                 if (characterMatches && (indexSearch == static_cast<int>(lenSearch))) {
1770                                         if (MatchesWordOptions(word, wordStart, pos, indexDocument)) {
1771                                                 *length = indexDocument;
1772                                                 return pos;
1773                                         }
1774                                 }
1775                                 if (!NextCharacter(pos, increment))
1776                                         break;
1777                         }
1778                 } else {
1779                         const int endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos;
1780                         std::vector<char> searchThing(lengthFind + 1);
1781                         pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind);
1782                         while (forward ? (pos < endSearch) : (pos >= endSearch)) {
1783                                 bool found = (pos + lengthFind) <= limitPos;
1784                                 for (int indexSearch = 0; (indexSearch < lengthFind) && found; indexSearch++) {
1785                                         char ch = CharAt(pos + indexSearch);
1786                                         char folded[2];
1787                                         pcf->Fold(folded, sizeof(folded), &ch, 1);
1788                                         found = folded[0] == searchThing[indexSearch];
1789                                 }
1790                                 if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
1791                                         return pos;
1792                                 }
1793                                 if (!NextCharacter(pos, increment))
1794                                         break;
1795                         }
1796                 }
1797         }
1798         //Platform::DebugPrintf("Not found\n");
1799         return -1;
1800 }
1801
1802 const char *Document::SubstituteByPosition(const char *text, int *length) {
1803         if (regex)
1804                 return regex->SubstituteByPosition(this, text, length);
1805         else
1806                 return 0;
1807 }
1808
1809 int Document::LinesTotal() const {
1810         return cb.Lines();
1811 }
1812
1813 void Document::SetDefaultCharClasses(bool includeWordClass) {
1814     charClass.SetDefaultCharClasses(includeWordClass);
1815 }
1816
1817 void Document::SetCharClasses(const unsigned char *chars, CharClassify::cc newCharClass) {
1818     charClass.SetCharClasses(chars, newCharClass);
1819 }
1820
1821 int Document::GetCharsOfClass(CharClassify::cc characterClass, unsigned char *buffer) {
1822     return charClass.GetCharsOfClass(characterClass, buffer);
1823 }
1824
1825 void SCI_METHOD Document::StartStyling(int position, char) {
1826         endStyled = position;
1827 }
1828
1829 bool SCI_METHOD Document::SetStyleFor(int length, char style) {
1830         if (enteredStyling != 0) {
1831                 return false;
1832         } else {
1833                 enteredStyling++;
1834                 int prevEndStyled = endStyled;
1835                 if (cb.SetStyleFor(endStyled, length, style)) {
1836                         DocModification mh(SC_MOD_CHANGESTYLE | SC_PERFORMED_USER,
1837                                            prevEndStyled, length);
1838                         NotifyModified(mh);
1839                 }
1840                 endStyled += length;
1841                 enteredStyling--;
1842                 return true;
1843         }
1844 }
1845
1846 bool SCI_METHOD Document::SetStyles(int length, const char *styles) {
1847         if (enteredStyling != 0) {
1848                 return false;
1849         } else {
1850                 enteredStyling++;
1851                 bool didChange = false;
1852                 int startMod = 0;
1853                 int endMod = 0;
1854                 for (int iPos = 0; iPos < length; iPos++, endStyled++) {
1855                         PLATFORM_ASSERT(endStyled < Length());
1856                         if (cb.SetStyleAt(endStyled, styles[iPos])) {
1857                                 if (!didChange) {
1858                                         startMod = endStyled;
1859                                 }
1860                                 didChange = true;
1861                                 endMod = endStyled;
1862                         }
1863                 }
1864                 if (didChange) {
1865                         DocModification mh(SC_MOD_CHANGESTYLE | SC_PERFORMED_USER,
1866                                            startMod, endMod - startMod + 1);
1867                         NotifyModified(mh);
1868                 }
1869                 enteredStyling--;
1870                 return true;
1871         }
1872 }
1873
1874 void Document::EnsureStyledTo(int pos) {
1875         if ((enteredStyling == 0) && (pos > GetEndStyled())) {
1876                 IncrementStyleClock();
1877                 if (pli && !pli->UseContainerLexing()) {
1878                         int lineEndStyled = LineFromPosition(GetEndStyled());
1879                         int endStyledTo = LineStart(lineEndStyled);
1880                         pli->Colourise(endStyledTo, pos);
1881                 } else {
1882                         // Ask the watchers to style, and stop as soon as one responds.
1883                         for (std::vector<WatcherWithUserData>::iterator it = watchers.begin();
1884                                 (pos > GetEndStyled()) && (it != watchers.end()); ++it) {
1885                                 it->watcher->NotifyStyleNeeded(this, it->userData, pos);
1886                         }
1887                 }
1888         }
1889 }
1890
1891 void Document::LexerChanged() {
1892         // Tell the watchers the lexer has changed.
1893         for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
1894                 it->watcher->NotifyLexerChanged(this, it->userData);
1895         }
1896 }
1897
1898 int SCI_METHOD Document::SetLineState(int line, int state) {
1899         int statePrevious = static_cast<LineState *>(perLineData[ldState])->SetLineState(line, state);
1900         if (state != statePrevious) {
1901                 DocModification mh(SC_MOD_CHANGELINESTATE, LineStart(line), 0, 0, 0, line);
1902                 NotifyModified(mh);
1903         }
1904         return statePrevious;
1905 }
1906
1907 int SCI_METHOD Document::GetLineState(int line) const {
1908         return static_cast<LineState *>(perLineData[ldState])->GetLineState(line);
1909 }
1910
1911 int Document::GetMaxLineState() {
1912         return static_cast<LineState *>(perLineData[ldState])->GetMaxLineState();
1913 }
1914
1915 void SCI_METHOD Document::ChangeLexerState(int start, int end) {
1916         DocModification mh(SC_MOD_LEXERSTATE, start, end-start, 0, 0, 0);
1917         NotifyModified(mh);
1918 }
1919
1920 StyledText Document::MarginStyledText(int line) const {
1921         LineAnnotation *pla = static_cast<LineAnnotation *>(perLineData[ldMargin]);
1922         return StyledText(pla->Length(line), pla->Text(line),
1923                 pla->MultipleStyles(line), pla->Style(line), pla->Styles(line));
1924 }
1925
1926 void Document::MarginSetText(int line, const char *text) {
1927         static_cast<LineAnnotation *>(perLineData[ldMargin])->SetText(line, text);
1928         DocModification mh(SC_MOD_CHANGEMARGIN, LineStart(line), 0, 0, 0, line);
1929         NotifyModified(mh);
1930 }
1931
1932 void Document::MarginSetStyle(int line, int style) {
1933         static_cast<LineAnnotation *>(perLineData[ldMargin])->SetStyle(line, style);
1934         NotifyModified(DocModification(SC_MOD_CHANGEMARGIN, LineStart(line), 0, 0, 0, line));
1935 }
1936
1937 void Document::MarginSetStyles(int line, const unsigned char *styles) {
1938         static_cast<LineAnnotation *>(perLineData[ldMargin])->SetStyles(line, styles);
1939         NotifyModified(DocModification(SC_MOD_CHANGEMARGIN, LineStart(line), 0, 0, 0, line));
1940 }
1941
1942 void Document::MarginClearAll() {
1943         int maxEditorLine = LinesTotal();
1944         for (int l=0; l<maxEditorLine; l++)
1945                 MarginSetText(l, 0);
1946         // Free remaining data
1947         static_cast<LineAnnotation *>(perLineData[ldMargin])->ClearAll();
1948 }
1949
1950 StyledText Document::AnnotationStyledText(int line) const {
1951         LineAnnotation *pla = static_cast<LineAnnotation *>(perLineData[ldAnnotation]);
1952         return StyledText(pla->Length(line), pla->Text(line),
1953                 pla->MultipleStyles(line), pla->Style(line), pla->Styles(line));
1954 }
1955
1956 void Document::AnnotationSetText(int line, const char *text) {
1957         if (line >= 0 && line < LinesTotal()) {
1958                 const int linesBefore = AnnotationLines(line);
1959                 static_cast<LineAnnotation *>(perLineData[ldAnnotation])->SetText(line, text);
1960                 const int linesAfter = AnnotationLines(line);
1961                 DocModification mh(SC_MOD_CHANGEANNOTATION, LineStart(line), 0, 0, 0, line);
1962                 mh.annotationLinesAdded = linesAfter - linesBefore;
1963                 NotifyModified(mh);
1964         }
1965 }
1966
1967 void Document::AnnotationSetStyle(int line, int style) {
1968         static_cast<LineAnnotation *>(perLineData[ldAnnotation])->SetStyle(line, style);
1969         DocModification mh(SC_MOD_CHANGEANNOTATION, LineStart(line), 0, 0, 0, line);
1970         NotifyModified(mh);
1971 }
1972
1973 void Document::AnnotationSetStyles(int line, const unsigned char *styles) {
1974         if (line >= 0 && line < LinesTotal()) {
1975                 static_cast<LineAnnotation *>(perLineData[ldAnnotation])->SetStyles(line, styles);
1976         }
1977 }
1978
1979 int Document::AnnotationLines(int line) const {
1980         return static_cast<LineAnnotation *>(perLineData[ldAnnotation])->Lines(line);
1981 }
1982
1983 void Document::AnnotationClearAll() {
1984         int maxEditorLine = LinesTotal();
1985         for (int l=0; l<maxEditorLine; l++)
1986                 AnnotationSetText(l, 0);
1987         // Free remaining data
1988         static_cast<LineAnnotation *>(perLineData[ldAnnotation])->ClearAll();
1989 }
1990
1991 void Document::IncrementStyleClock() {
1992         styleClock = (styleClock + 1) % 0x100000;
1993 }
1994
1995 void SCI_METHOD Document::DecorationFillRange(int position, int value, int fillLength) {
1996         if (decorations.FillRange(position, value, fillLength)) {
1997                 DocModification mh(SC_MOD_CHANGEINDICATOR | SC_PERFORMED_USER,
1998                                                         position, fillLength);
1999                 NotifyModified(mh);
2000         }
2001 }
2002
2003 bool Document::AddWatcher(DocWatcher *watcher, void *userData) {
2004         WatcherWithUserData wwud(watcher, userData);
2005         std::vector<WatcherWithUserData>::iterator it =
2006                 std::find(watchers.begin(), watchers.end(), wwud);
2007         if (it != watchers.end())
2008                 return false;
2009         watchers.push_back(wwud);
2010         return true;
2011 }
2012
2013 bool Document::RemoveWatcher(DocWatcher *watcher, void *userData) {
2014         std::vector<WatcherWithUserData>::iterator it =
2015                 std::find(watchers.begin(), watchers.end(), WatcherWithUserData(watcher, userData));
2016         if (it != watchers.end()) {
2017                 watchers.erase(it);
2018                 return true;
2019         }
2020         return false;
2021 }
2022
2023 void Document::NotifyModifyAttempt() {
2024         for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
2025                 it->watcher->NotifyModifyAttempt(this, it->userData);
2026         }
2027 }
2028
2029 void Document::NotifySavePoint(bool atSavePoint) {
2030         for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
2031                 it->watcher->NotifySavePoint(this, it->userData, atSavePoint);
2032         }
2033 }
2034
2035 void Document::NotifyModified(DocModification mh) {
2036         if (mh.modificationType & SC_MOD_INSERTTEXT) {
2037                 decorations.InsertSpace(mh.position, mh.length);
2038         } else if (mh.modificationType & SC_MOD_DELETETEXT) {
2039                 decorations.DeleteRange(mh.position, mh.length);
2040         }
2041         for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
2042                 it->watcher->NotifyModified(this, mh, it->userData);
2043         }
2044 }
2045
2046 bool Document::IsWordPartSeparator(char ch) const {
2047         return (WordCharClass(ch) == CharClassify::ccWord) && IsPunctuation(ch);
2048 }
2049
2050 int Document::WordPartLeft(int pos) {
2051         if (pos > 0) {
2052                 --pos;
2053                 char startChar = cb.CharAt(pos);
2054                 if (IsWordPartSeparator(startChar)) {
2055                         while (pos > 0 && IsWordPartSeparator(cb.CharAt(pos))) {
2056                                 --pos;
2057                         }
2058                 }
2059                 if (pos > 0) {
2060                         startChar = cb.CharAt(pos);
2061                         --pos;
2062                         if (IsLowerCase(startChar)) {
2063                                 while (pos > 0 && IsLowerCase(cb.CharAt(pos)))
2064                                         --pos;
2065                                 if (!IsUpperCase(cb.CharAt(pos)) && !IsLowerCase(cb.CharAt(pos)))
2066                                         ++pos;
2067                         } else if (IsUpperCase(startChar)) {
2068                                 while (pos > 0 && IsUpperCase(cb.CharAt(pos)))
2069                                         --pos;
2070                                 if (!IsUpperCase(cb.CharAt(pos)))
2071                                         ++pos;
2072                         } else if (IsADigit(startChar)) {
2073                                 while (pos > 0 && IsADigit(cb.CharAt(pos)))
2074                                         --pos;
2075                                 if (!IsADigit(cb.CharAt(pos)))
2076                                         ++pos;
2077                         } else if (IsPunctuation(startChar)) {
2078                                 while (pos > 0 && IsPunctuation(cb.CharAt(pos)))
2079                                         --pos;
2080                                 if (!IsPunctuation(cb.CharAt(pos)))
2081                                         ++pos;
2082                         } else if (isspacechar(startChar)) {
2083                                 while (pos > 0 && isspacechar(cb.CharAt(pos)))
2084                                         --pos;
2085                                 if (!isspacechar(cb.CharAt(pos)))
2086                                         ++pos;
2087                         } else if (!IsASCII(startChar)) {
2088                                 while (pos > 0 && !IsASCII(cb.CharAt(pos)))
2089                                         --pos;
2090                                 if (IsASCII(cb.CharAt(pos)))
2091                                         ++pos;
2092                         } else {
2093                                 ++pos;
2094                         }
2095                 }
2096         }
2097         return pos;
2098 }
2099
2100 int Document::WordPartRight(int pos) {
2101         char startChar = cb.CharAt(pos);
2102         int length = Length();
2103         if (IsWordPartSeparator(startChar)) {
2104                 while (pos < length && IsWordPartSeparator(cb.CharAt(pos)))
2105                         ++pos;
2106                 startChar = cb.CharAt(pos);
2107         }
2108         if (!IsASCII(startChar)) {
2109                 while (pos < length && !IsASCII(cb.CharAt(pos)))
2110                         ++pos;
2111         } else if (IsLowerCase(startChar)) {
2112                 while (pos < length && IsLowerCase(cb.CharAt(pos)))
2113                         ++pos;
2114         } else if (IsUpperCase(startChar)) {
2115                 if (IsLowerCase(cb.CharAt(pos + 1))) {
2116                         ++pos;
2117                         while (pos < length && IsLowerCase(cb.CharAt(pos)))
2118                                 ++pos;
2119                 } else {
2120                         while (pos < length && IsUpperCase(cb.CharAt(pos)))
2121                                 ++pos;
2122                 }
2123                 if (IsLowerCase(cb.CharAt(pos)) && IsUpperCase(cb.CharAt(pos - 1)))
2124                         --pos;
2125         } else if (IsADigit(startChar)) {
2126                 while (pos < length && IsADigit(cb.CharAt(pos)))
2127                         ++pos;
2128         } else if (IsPunctuation(startChar)) {
2129                 while (pos < length && IsPunctuation(cb.CharAt(pos)))
2130                         ++pos;
2131         } else if (isspacechar(startChar)) {
2132                 while (pos < length && isspacechar(cb.CharAt(pos)))
2133                         ++pos;
2134         } else {
2135                 ++pos;
2136         }
2137         return pos;
2138 }
2139
2140 bool IsLineEndChar(char c) {
2141         return (c == '\n' || c == '\r');
2142 }
2143
2144 int Document::ExtendStyleRange(int pos, int delta, bool singleLine) {
2145         int sStart = cb.StyleAt(pos);
2146         if (delta < 0) {
2147                 while (pos > 0 && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsLineEndChar(cb.CharAt(pos))))
2148                         pos--;
2149                 pos++;
2150         } else {
2151                 while (pos < (Length()) && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsLineEndChar(cb.CharAt(pos))))
2152                         pos++;
2153         }
2154         return pos;
2155 }
2156
2157 static char BraceOpposite(char ch) {
2158         switch (ch) {
2159         case '(':
2160                 return ')';
2161         case ')':
2162                 return '(';
2163         case '[':
2164                 return ']';
2165         case ']':
2166                 return '[';
2167         case '{':
2168                 return '}';
2169         case '}':
2170                 return '{';
2171         case '<':
2172                 return '>';
2173         case '>':
2174                 return '<';
2175         default:
2176                 return '\0';
2177         }
2178 }
2179
2180 // TODO: should be able to extend styled region to find matching brace
2181 int Document::BraceMatch(int position, int /*maxReStyle*/) {
2182         char chBrace = CharAt(position);
2183         char chSeek = BraceOpposite(chBrace);
2184         if (chSeek == '\0')
2185                 return - 1;
2186         char styBrace = static_cast<char>(StyleAt(position));
2187         int direction = -1;
2188         if (chBrace == '(' || chBrace == '[' || chBrace == '{' || chBrace == '<')
2189                 direction = 1;
2190         int depth = 1;
2191         position = NextPosition(position, direction);
2192         while ((position >= 0) && (position < Length())) {
2193                 char chAtPos = CharAt(position);
2194                 char styAtPos = static_cast<char>(StyleAt(position));
2195                 if ((position > GetEndStyled()) || (styAtPos == styBrace)) {
2196                         if (chAtPos == chBrace)
2197                                 depth++;
2198                         if (chAtPos == chSeek)
2199                                 depth--;
2200                         if (depth == 0)
2201                                 return position;
2202                 }
2203                 int positionBeforeMove = position;
2204                 position = NextPosition(position, direction);
2205                 if (position == positionBeforeMove)
2206                         break;
2207         }
2208         return - 1;
2209 }
2210
2211 /**
2212  * Implementation of RegexSearchBase for the default built-in regular expression engine
2213  */
2214 class BuiltinRegex : public RegexSearchBase {
2215 public:
2216         explicit BuiltinRegex(CharClassify *charClassTable) : search(charClassTable) {}
2217
2218         virtual ~BuiltinRegex() {
2219         }
2220
2221         virtual long FindText(Document *doc, int minPos, int maxPos, const char *s,
2222                         bool caseSensitive, bool word, bool wordStart, int flags,
2223                         int *length);
2224
2225         virtual const char *SubstituteByPosition(Document *doc, const char *text, int *length);
2226
2227 private:
2228         RESearch search;
2229         std::string substituted;
2230 };
2231
2232 namespace {
2233
2234 /**
2235 * RESearchRange keeps track of search range.
2236 */
2237 class RESearchRange {
2238 public:
2239         const Document *doc;
2240         int increment;
2241         int startPos;
2242         int endPos;
2243         int lineRangeStart;
2244         int lineRangeEnd;
2245         int lineRangeBreak;
2246         RESearchRange(const Document *doc_, int minPos, int maxPos) : doc(doc_) {
2247                 increment = (minPos <= maxPos) ? 1 : -1;
2248
2249                 // Range endpoints should not be inside DBCS characters, but just in case, move them.
2250                 startPos = doc->MovePositionOutsideChar(minPos, 1, false);
2251                 endPos = doc->MovePositionOutsideChar(maxPos, 1, false);
2252
2253                 lineRangeStart = doc->LineFromPosition(startPos);
2254                 lineRangeEnd = doc->LineFromPosition(endPos);
2255                 if ((increment == 1) &&
2256                         (startPos >= doc->LineEnd(lineRangeStart)) &&
2257                         (lineRangeStart < lineRangeEnd)) {
2258                         // the start position is at end of line or between line end characters.
2259                         lineRangeStart++;
2260                         startPos = doc->LineStart(lineRangeStart);
2261                 } else if ((increment == -1) &&
2262                         (startPos <= doc->LineStart(lineRangeStart)) &&
2263                         (lineRangeStart > lineRangeEnd)) {
2264                         // the start position is at beginning of line.
2265                         lineRangeStart--;
2266                         startPos = doc->LineEnd(lineRangeStart);
2267                 }
2268                 lineRangeBreak = lineRangeEnd + increment;
2269         }
2270         Range LineRange(int line) const {
2271                 Range range(doc->LineStart(line), doc->LineEnd(line));
2272                 if (increment == 1) {
2273                         if (line == lineRangeStart)
2274                                 range.start = startPos;
2275                         if (line == lineRangeEnd)
2276                                 range.end = endPos;
2277                 } else {
2278                         if (line == lineRangeEnd)
2279                                 range.start = endPos;
2280                         if (line == lineRangeStart)
2281                                 range.end = startPos;
2282                 }
2283                 return range;
2284         }
2285 };
2286
2287 // Define a way for the Regular Expression code to access the document
2288 class DocumentIndexer : public CharacterIndexer {
2289         Document *pdoc;
2290         int end;
2291 public:
2292         DocumentIndexer(Document *pdoc_, int end_) :
2293                 pdoc(pdoc_), end(end_) {
2294         }
2295
2296         virtual ~DocumentIndexer() {
2297         }
2298
2299         virtual char CharAt(int index) {
2300                 if (index < 0 || index >= end)
2301                         return 0;
2302                 else
2303                         return pdoc->CharAt(index);
2304         }
2305 };
2306
2307 #ifdef CXX11_REGEX
2308
2309 class ByteIterator : public std::iterator<std::bidirectional_iterator_tag, char> {
2310 public:
2311         const Document *doc;
2312         Position position;
2313         ByteIterator(const Document *doc_ = 0, Position position_ = 0) : doc(doc_), position(position_) {
2314         }
2315         ByteIterator(const ByteIterator &other) {
2316                 doc = other.doc;
2317                 position = other.position;
2318         }
2319         ByteIterator &operator=(const ByteIterator &other) {
2320                 if (this != &other) {
2321                         doc = other.doc;
2322                         position = other.position;
2323                 }
2324                 return *this;
2325         }
2326         char operator*() const {
2327                 return doc->CharAt(position);
2328         }
2329         ByteIterator &operator++() {
2330                 position++;
2331                 return *this;
2332         }
2333         ByteIterator operator++(int) {
2334                 ByteIterator retVal(*this);
2335                 position++;
2336                 return retVal;
2337         }
2338         ByteIterator &operator--() {
2339                 position--;
2340                 return *this;
2341         }
2342         bool operator==(const ByteIterator &other) const {
2343                 return doc == other.doc && position == other.position;
2344         }
2345         bool operator!=(const ByteIterator &other) const {
2346                 return doc != other.doc || position != other.position;
2347         }
2348         int Pos() const {
2349                 return position;
2350         }
2351         int PosRoundUp() const {
2352                 return position;
2353         }
2354 };
2355
2356 // On Windows, wchar_t is 16 bits wide and on Unix it is 32 bits wide.
2357 // Would be better to use sizeof(wchar_t) or similar to differentiate
2358 // but easier for now to hard-code platforms.
2359 // C++11 has char16_t and char32_t but neither Clang nor Visual C++
2360 // appear to allow specializing basic_regex over these.
2361
2362 #ifdef _WIN32
2363 #define WCHAR_T_IS_16 1
2364 #else
2365 #define WCHAR_T_IS_16 0
2366 #endif
2367
2368 #if WCHAR_T_IS_16
2369
2370 // On Windows, report non-BMP characters as 2 separate surrogates as that
2371 // matches wregex since it is based on wchar_t.
2372 class UTF8Iterator : public std::iterator<std::bidirectional_iterator_tag, wchar_t> {
2373         // These 3 fields determine the iterator position and are used for comparisons
2374         const Document *doc;
2375         Position position;
2376         size_t characterIndex;
2377         // Remaining fields are derived from the determining fields so are excluded in comparisons
2378         unsigned int lenBytes;
2379         size_t lenCharacters;
2380         wchar_t buffered[2];
2381 public:
2382         UTF8Iterator(const Document *doc_ = 0, Position position_ = 0) :
2383                 doc(doc_), position(position_), characterIndex(0), lenBytes(0), lenCharacters(0) {
2384                 buffered[0] = 0;
2385                 buffered[1] = 0;
2386                 if (doc) {
2387                         ReadCharacter();
2388                 }
2389         }
2390         UTF8Iterator(const UTF8Iterator &other) {
2391                 doc = other.doc;
2392                 position = other.position;
2393                 characterIndex = other.characterIndex;
2394                 lenBytes = other.lenBytes;
2395                 lenCharacters = other.lenCharacters;
2396                 buffered[0] = other.buffered[0];
2397                 buffered[1] = other.buffered[1];
2398         }
2399         UTF8Iterator &operator=(const UTF8Iterator &other) {
2400                 if (this != &other) {
2401                         doc = other.doc;
2402                         position = other.position;
2403                         characterIndex = other.characterIndex;
2404                         lenBytes = other.lenBytes;
2405                         lenCharacters = other.lenCharacters;
2406                         buffered[0] = other.buffered[0];
2407                         buffered[1] = other.buffered[1];
2408                 }
2409                 return *this;
2410         }
2411         wchar_t operator*() const {
2412                 assert(lenCharacters != 0);
2413                 return buffered[characterIndex];
2414         }
2415         UTF8Iterator &operator++() {
2416                 if ((characterIndex + 1) < (lenCharacters)) {
2417                         characterIndex++;
2418                 } else {
2419                         position += lenBytes;
2420                         ReadCharacter();
2421                         characterIndex = 0;
2422                 }
2423                 return *this;
2424         }
2425         UTF8Iterator operator++(int) {
2426                 UTF8Iterator retVal(*this);
2427                 if ((characterIndex + 1) < (lenCharacters)) {
2428                         characterIndex++;
2429                 } else {
2430                         position += lenBytes;
2431                         ReadCharacter();
2432                         characterIndex = 0;
2433                 }
2434                 return retVal;
2435         }
2436         UTF8Iterator &operator--() {
2437                 if (characterIndex) {
2438                         characterIndex--;
2439                 } else {
2440                         position = doc->NextPosition(position, -1);
2441                         ReadCharacter();
2442                         characterIndex = lenCharacters - 1;
2443                 }
2444                 return *this;
2445         }
2446         bool operator==(const UTF8Iterator &other) const {
2447                 // Only test the determining fields, not the character widths and values derived from this
2448                 return doc == other.doc &&
2449                         position == other.position &&
2450                         characterIndex == other.characterIndex;
2451         }
2452         bool operator!=(const UTF8Iterator &other) const {
2453                 // Only test the determining fields, not the character widths and values derived from this
2454                 return doc != other.doc ||
2455                         position != other.position ||
2456                         characterIndex != other.characterIndex;
2457         }
2458         int Pos() const {
2459                 return position;
2460         }
2461         int PosRoundUp() const {
2462                 if (characterIndex)
2463                         return position + lenBytes;     // Force to end of character
2464                 else
2465                         return position;
2466         }
2467 private:
2468         void ReadCharacter() {
2469                 Document::CharacterExtracted charExtracted = doc->ExtractCharacter(position);
2470                 lenBytes = charExtracted.widthBytes;
2471                 if (charExtracted.character == unicodeReplacementChar) {
2472                         lenCharacters = 1;
2473                         buffered[0] = static_cast<wchar_t>(charExtracted.character);
2474                 } else {
2475                         lenCharacters = UTF16FromUTF32Character(charExtracted.character, buffered);
2476                 }
2477         }
2478 };
2479
2480 #else
2481
2482 // On Unix, report non-BMP characters as single characters
2483
2484 class UTF8Iterator : public std::iterator<std::bidirectional_iterator_tag, wchar_t> {
2485         const Document *doc;
2486         Position position;
2487 public:
2488         UTF8Iterator(const Document *doc_=0, Position position_=0) : doc(doc_), position(position_) {
2489         }
2490         UTF8Iterator(const UTF8Iterator &other) {
2491                 doc = other.doc;
2492                 position = other.position;
2493         }
2494         UTF8Iterator &operator=(const UTF8Iterator &other) {
2495                 if (this != &other) {
2496                         doc = other.doc;
2497                         position = other.position;
2498                 }
2499                 return *this;
2500         }
2501         wchar_t operator*() const {
2502                 Document::CharacterExtracted charExtracted = doc->ExtractCharacter(position);
2503                 return charExtracted.character;
2504         }
2505         UTF8Iterator &operator++() {
2506                 position = doc->NextPosition(position, 1);
2507                 return *this;
2508         }
2509         UTF8Iterator operator++(int) {
2510                 UTF8Iterator retVal(*this);
2511                 position = doc->NextPosition(position, 1);
2512                 return retVal;
2513         }
2514         UTF8Iterator &operator--() {
2515                 position = doc->NextPosition(position, -1);
2516                 return *this;
2517         }
2518         bool operator==(const UTF8Iterator &other) const {
2519                 return doc == other.doc && position == other.position;
2520         }
2521         bool operator!=(const UTF8Iterator &other) const {
2522                 return doc != other.doc || position != other.position;
2523         }
2524         int Pos() const {
2525                 return position;
2526         }
2527         int PosRoundUp() const {
2528                 return position;
2529         }
2530 };
2531
2532 #endif
2533
2534 std::regex_constants::match_flag_type MatchFlags(const Document *doc, int startPos, int endPos) {
2535         std::regex_constants::match_flag_type flagsMatch = std::regex_constants::match_default;
2536         if (!doc->IsLineStartPosition(startPos))
2537                 flagsMatch |= std::regex_constants::match_not_bol;
2538         if (!doc->IsLineEndPosition(endPos))
2539                 flagsMatch |= std::regex_constants::match_not_eol;
2540         return flagsMatch;
2541 }
2542
2543 template<typename Iterator, typename Regex>
2544 bool MatchOnLines(const Document *doc, const Regex &regexp, const RESearchRange &resr, RESearch &search) {
2545         bool matched = false;
2546         std::match_results<Iterator> match;
2547
2548         // MSVC and libc++ have problems with ^ and $ matching line ends inside a range
2549         // If they didn't then the line by line iteration could be removed for the forwards
2550         // case and replaced with the following 4 lines:
2551         //      Iterator uiStart(doc, startPos);
2552         //      Iterator uiEnd(doc, endPos);
2553         //      flagsMatch = MatchFlags(doc, startPos, endPos);
2554         //      matched = std::regex_search(uiStart, uiEnd, match, regexp, flagsMatch);
2555
2556         // Line by line.
2557         for (int line = resr.lineRangeStart; line != resr.lineRangeBreak; line += resr.increment) {
2558                 const Range lineRange = resr.LineRange(line);
2559                 Iterator itStart(doc, lineRange.start);
2560                 Iterator itEnd(doc, lineRange.end);
2561                 std::regex_constants::match_flag_type flagsMatch = MatchFlags(doc, lineRange.start, lineRange.end);
2562                 matched = std::regex_search(itStart, itEnd, match, regexp, flagsMatch);
2563                 // Check for the last match on this line.
2564                 if (matched) {
2565                         if (resr.increment == -1) {
2566                                 while (matched) {
2567                                         Iterator itNext(doc, match[0].second.PosRoundUp());
2568                                         flagsMatch = MatchFlags(doc, itNext.Pos(), lineRange.end);
2569                                         std::match_results<Iterator> matchNext;
2570                                         matched = std::regex_search(itNext, itEnd, matchNext, regexp, flagsMatch);
2571                                         if (matched) {
2572                                                 if (match[0].first == match[0].second) {
2573                                                         // Empty match means failure so exit
2574                                                         return false;
2575                                                 }
2576                                                 match = matchNext;
2577                                         }
2578                                 }
2579                                 matched = true;
2580                         }
2581                         break;
2582                 }
2583         }
2584         if (matched) {
2585                 for (size_t co = 0; co < match.size(); co++) {
2586                         search.bopat[co] = match[co].first.Pos();
2587                         search.eopat[co] = match[co].second.PosRoundUp();
2588                         size_t lenMatch = search.eopat[co] - search.bopat[co];
2589                         search.pat[co].resize(lenMatch);
2590                         for (size_t iPos = 0; iPos < lenMatch; iPos++) {
2591                                 search.pat[co][iPos] = doc->CharAt(iPos + search.bopat[co]);
2592                         }
2593                 }
2594         }
2595         return matched;
2596 }
2597
2598 long Cxx11RegexFindText(Document *doc, int minPos, int maxPos, const char *s,
2599         bool caseSensitive, int *length, RESearch &search) {
2600         const RESearchRange resr(doc, minPos, maxPos);
2601         try {
2602                 //ElapsedTime et;
2603                 std::regex::flag_type flagsRe = std::regex::ECMAScript;
2604                 // Flags that apper to have no effect:
2605                 // | std::regex::collate | std::regex::extended;
2606                 if (!caseSensitive)
2607                         flagsRe = flagsRe | std::regex::icase;
2608
2609                 // Clear the RESearch so can fill in matches
2610                 search.Clear();
2611
2612                 bool matched = false;
2613                 if (SC_CP_UTF8 == doc->dbcsCodePage) {
2614                         unsigned int lenS = static_cast<unsigned int>(strlen(s));
2615                         std::vector<wchar_t> ws(lenS + 1);
2616 #if WCHAR_T_IS_16
2617                         size_t outLen = UTF16FromUTF8(s, lenS, &ws[0], lenS);
2618 #else
2619                         size_t outLen = UTF32FromUTF8(s, lenS, reinterpret_cast<unsigned int *>(&ws[0]), lenS);
2620 #endif
2621                         ws[outLen] = 0;
2622                         std::wregex regexp;
2623 #if defined(__APPLE__)
2624                         // Using a UTF-8 locale doesn't change to Unicode over a byte buffer so '.'
2625                         // is one byte not one character.
2626                         // However, on OS X this makes wregex act as Unicode
2627                         std::locale localeU("en_US.UTF-8");
2628                         regexp.imbue(localeU);
2629 #endif
2630                         regexp.assign(&ws[0], flagsRe);
2631                         matched = MatchOnLines<UTF8Iterator>(doc, regexp, resr, search);
2632
2633                 } else {
2634                         std::regex regexp;
2635                         regexp.assign(s, flagsRe);
2636                         matched = MatchOnLines<ByteIterator>(doc, regexp, resr, search);
2637                 }
2638
2639                 int posMatch = -1;
2640                 if (matched) {
2641                         posMatch = search.bopat[0];
2642                         *length = search.eopat[0] - search.bopat[0];
2643                 }
2644                 // Example - search in doc/ScintillaHistory.html for
2645                 // [[:upper:]]eta[[:space:]]
2646                 // On MacBook, normally around 1 second but with locale imbued -> 14 seconds.
2647                 //double durSearch = et.Duration(true);
2648                 //Platform::DebugPrintf("Search:%9.6g \n", durSearch);
2649                 return posMatch;
2650         } catch (std::regex_error &) {
2651                 // Failed to create regular expression
2652                 throw RegexError();
2653         } catch (...) {
2654                 // Failed in some other way
2655                 return -1;
2656         }
2657 }
2658
2659 #endif
2660
2661 }
2662
2663 long BuiltinRegex::FindText(Document *doc, int minPos, int maxPos, const char *s,
2664                         bool caseSensitive, bool, bool, int flags,
2665                         int *length) {
2666
2667 #ifdef CXX11_REGEX
2668         if (flags & SCFIND_CXX11REGEX) {
2669                         return Cxx11RegexFindText(doc, minPos, maxPos, s,
2670                         caseSensitive, length, search);
2671         }
2672 #endif
2673
2674         const RESearchRange resr(doc, minPos, maxPos);
2675
2676         const bool posix = (flags & SCFIND_POSIX) != 0;
2677
2678         const char *errmsg = search.Compile(s, *length, caseSensitive, posix);
2679         if (errmsg) {
2680                 return -1;
2681         }
2682         // Find a variable in a property file: \$(\([A-Za-z0-9_.]+\))
2683         // Replace first '.' with '-' in each property file variable reference:
2684         //     Search: \$(\([A-Za-z0-9_-]+\)\.\([A-Za-z0-9_.]+\))
2685         //     Replace: $(\1-\2)
2686         int pos = -1;
2687         int lenRet = 0;
2688         const char searchEnd = s[*length - 1];
2689         const char searchEndPrev = (*length > 1) ? s[*length - 2] : '\0';
2690         for (int line = resr.lineRangeStart; line != resr.lineRangeBreak; line += resr.increment) {
2691                 int startOfLine = doc->LineStart(line);
2692                 int endOfLine = doc->LineEnd(line);
2693                 if (resr.increment == 1) {
2694                         if (line == resr.lineRangeStart) {
2695                                 if ((resr.startPos != startOfLine) && (s[0] == '^'))
2696                                         continue;       // Can't match start of line if start position after start of line
2697                                 startOfLine = resr.startPos;
2698                         }
2699                         if (line == resr.lineRangeEnd) {
2700                                 if ((resr.endPos != endOfLine) && (searchEnd == '$') && (searchEndPrev != '\\'))
2701                                         continue;       // Can't match end of line if end position before end of line
2702                                 endOfLine = resr.endPos;
2703                         }
2704                 } else {
2705                         if (line == resr.lineRangeEnd) {
2706                                 if ((resr.endPos != startOfLine) && (s[0] == '^'))
2707                                         continue;       // Can't match start of line if end position after start of line
2708                                 startOfLine = resr.endPos;
2709                         }
2710                         if (line == resr.lineRangeStart) {
2711                                 if ((resr.startPos != endOfLine) && (searchEnd == '$') && (searchEndPrev != '\\'))
2712                                         continue;       // Can't match end of line if start position before end of line
2713                                 endOfLine = resr.startPos;
2714                         }
2715                 }
2716
2717                 DocumentIndexer di(doc, endOfLine);
2718                 int success = search.Execute(di, startOfLine, endOfLine);
2719                 if (success) {
2720                         pos = search.bopat[0];
2721                         // Ensure only whole characters selected
2722                         search.eopat[0] = doc->MovePositionOutsideChar(search.eopat[0], 1, false);
2723                         lenRet = search.eopat[0] - search.bopat[0];
2724                         // There can be only one start of a line, so no need to look for last match in line
2725                         if ((resr.increment == -1) && (s[0] != '^')) {
2726                                 // Check for the last match on this line.
2727                                 int repetitions = 1000; // Break out of infinite loop
2728                                 while (success && (search.eopat[0] <= endOfLine) && (repetitions--)) {
2729                                         success = search.Execute(di, pos+1, endOfLine);
2730                                         if (success) {
2731                                                 if (search.eopat[0] <= minPos) {
2732                                                         pos = search.bopat[0];
2733                                                         lenRet = search.eopat[0] - search.bopat[0];
2734                                                 } else {
2735                                                         success = 0;
2736                                                 }
2737                                         }
2738                                 }
2739                         }
2740                         break;
2741                 }
2742         }
2743         *length = lenRet;
2744         return pos;
2745 }
2746
2747 const char *BuiltinRegex::SubstituteByPosition(Document *doc, const char *text, int *length) {
2748         substituted.clear();
2749         DocumentIndexer di(doc, doc->Length());
2750         search.GrabMatches(di);
2751         for (int j = 0; j < *length; j++) {
2752                 if (text[j] == '\\') {
2753                         if (text[j + 1] >= '0' && text[j + 1] <= '9') {
2754                                 unsigned int patNum = text[j + 1] - '0';
2755                                 unsigned int len = search.eopat[patNum] - search.bopat[patNum];
2756                                 if (!search.pat[patNum].empty())        // Will be null if try for a match that did not occur
2757                                         substituted.append(search.pat[patNum].c_str(), len);
2758                                 j++;
2759                         } else {
2760                                 j++;
2761                                 switch (text[j]) {
2762                                 case 'a':
2763                                         substituted.push_back('\a');
2764                                         break;
2765                                 case 'b':
2766                                         substituted.push_back('\b');
2767                                         break;
2768                                 case 'f':
2769                                         substituted.push_back('\f');
2770                                         break;
2771                                 case 'n':
2772                                         substituted.push_back('\n');
2773                                         break;
2774                                 case 'r':
2775                                         substituted.push_back('\r');
2776                                         break;
2777                                 case 't':
2778                                         substituted.push_back('\t');
2779                                         break;
2780                                 case 'v':
2781                                         substituted.push_back('\v');
2782                                         break;
2783                                 case '\\':
2784                                         substituted.push_back('\\');
2785                                         break;
2786                                 default:
2787                                         substituted.push_back('\\');
2788                                         j--;
2789                                 }
2790                         }
2791                 } else {
2792                         substituted.push_back(text[j]);
2793                 }
2794         }
2795         *length = static_cast<int>(substituted.length());
2796         return substituted.c_str();
2797 }
2798
2799 #ifndef SCI_OWNREGEX
2800
2801 #ifdef SCI_NAMESPACE
2802
2803 RegexSearchBase *Scintilla::CreateRegexSearch(CharClassify *charClassTable) {
2804         return new BuiltinRegex(charClassTable);
2805 }
2806
2807 #else
2808
2809 RegexSearchBase *CreateRegexSearch(CharClassify *charClassTable) {
2810         return new BuiltinRegex(charClassTable);
2811 }
2812
2813 #endif
2814
2815 #endif