scintilla/src/Document.cxx

   1 // Scintilla source code edit control
   2 /** @file Document.cxx
   3  ** Text document that handles notifications, DBCS, styling, words and end of line.
   4  **/
   5 // Copyright 1998-2011 by Neil Hodgson <neilh@scintilla.org>
   6 // The License.txt file describes the conditions under which this software may be distributed.
   7
   8 #include <stdlib.h>
   9 #include <string.h>
  10 #include <stdio.h>
  11 #include <assert.h>
  12 #include <ctype.h>
  13
  14 #include <stdexcept>
  15 #include <string>
  16 #include <vector>
  17 #include <algorithm>
  18
  19 #ifdef CXX11_REGEX
  20 #include <regex>
  21 #endif
  22
  23 #include "Platform.h"
  24
  25 #include "ILexer.h"
  26 #include "Scintilla.h"
  27
  28 #include "CharacterSet.h"
  29 #include "SplitVector.h"
  30 #include "Partitioning.h"
  31 #include "RunStyles.h"
  32 #include "CellBuffer.h"
  33 #include "PerLine.h"
  34 #include "CharClassify.h"
  35 #include "Decoration.h"
  36 #include "CaseFolder.h"
  37 #include "Document.h"
  38 #include "RESearch.h"
  39 #include "UniConversion.h"
  40 #include "UnicodeFromUTF8.h"
  41
  42 #ifdef SCI_NAMESPACE
  43 using namespace Scintilla;
  44 #endif
  45
  46 static inline bool IsPunctuation(char ch) {
  47         return IsASCII(ch) && ispunct(ch);
  48 }
  49
  50 void LexInterface::Colourise(int start, int end) {
  51         if (pdoc && instance && !performingStyle) {
  52                 // Protect against reentrance, which may occur, for example, when
  53                 // fold points are discovered while performing styling and the folding
  54                 // code looks for child lines which may trigger styling.
  55                 performingStyle = true;
  56
  57                 int lengthDoc = pdoc->Length();
  58                 if (end == -1)
  59                         end = lengthDoc;
  60                 int len = end - start;
  61
  62                 PLATFORM_ASSERT(len >= 0);
  63                 PLATFORM_ASSERT(start + len <= lengthDoc);
  64
  65                 int styleStart = 0;
  66                 if (start > 0)
  67                         styleStart = pdoc->StyleAt(start - 1);
  68
  69                 if (len > 0) {
  70                         instance->Lex(start, len, styleStart, pdoc);
  71                         instance->Fold(start, len, styleStart, pdoc);
  72                 }
  73
  74                 performingStyle = false;
  75         }
  76 }
  77
  78 int LexInterface::LineEndTypesSupported() {
  79         if (instance) {
  80                 int interfaceVersion = instance->Version();
  81                 if (interfaceVersion >= lvSubStyles) {
  82                         ILexerWithSubStyles *ssinstance = static_cast<ILexerWithSubStyles *>(instance);
  83                         return ssinstance->LineEndTypesSupported();
  84                 }
  85         }
  86         return 0;
  87 }
  88
  89 Document::Document() {
  90         refCount = 0;
  91         pcf = NULL;
  92 #ifdef _WIN32
  93         eolMode = SC_EOL_CRLF;
  94 #else
  95         eolMode = SC_EOL_LF;
  96 #endif
  97         dbcsCodePage = 0;
  98         lineEndBitSet = SC_LINE_END_TYPE_DEFAULT;
  99         endStyled = 0;
 100         styleClock = 0;
 101         enteredModification = 0;
 102         enteredStyling = 0;
 103         enteredReadOnlyCount = 0;
 104         insertionSet = false;
 105         tabInChars = 8;
 106         indentInChars = 0;
 107         actualIndentInChars = 8;
 108         useTabs = true;
 109         tabIndents = true;
 110         backspaceUnindents = false;
 111
 112         matchesValid = false;
 113         regex = 0;
 114
 115         UTF8BytesOfLeadInitialise();
 116
 117         perLineData[ldMarkers] = new LineMarkers();
 118         perLineData[ldLevels] = new LineLevels();
 119         perLineData[ldState] = new LineState();
 120         perLineData[ldMargin] = new LineAnnotation();
 121         perLineData[ldAnnotation] = new LineAnnotation();
 122
 123         cb.SetPerLine(this);
 124
 125         pli = 0;
 126 }
 127
 128 Document::~Document() {
 129         for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
 130                 it->watcher->NotifyDeleted(this, it->userData);
 131         }
 132         for (int j=0; j<ldSize; j++) {
 133                 delete perLineData[j];
 134                 perLineData[j] = 0;
 135         }
 136         delete regex;
 137         regex = 0;
 138         delete pli;
 139         pli = 0;
 140         delete pcf;
 141         pcf = 0;
 142 }
 143
 144 void Document::Init() {
 145         for (int j=0; j<ldSize; j++) {
 146                 if (perLineData[j])
 147                         perLineData[j]->Init();
 148         }
 149 }
 150
 151 int Document::LineEndTypesSupported() const {
 152         if ((SC_CP_UTF8 == dbcsCodePage) && pli)
 153                 return pli->LineEndTypesSupported();
 154         else
 155                 return 0;
 156 }
 157
 158 bool Document::SetDBCSCodePage(int dbcsCodePage_) {
 159         if (dbcsCodePage != dbcsCodePage_) {
 160                 dbcsCodePage = dbcsCodePage_;
 161                 SetCaseFolder(NULL);
 162                 cb.SetLineEndTypes(lineEndBitSet & LineEndTypesSupported());
 163                 return true;
 164         } else {
 165                 return false;
 166         }
 167 }
 168
 169 bool Document::SetLineEndTypesAllowed(int lineEndBitSet_) {
 170         if (lineEndBitSet != lineEndBitSet_) {
 171                 lineEndBitSet = lineEndBitSet_;
 172                 int lineEndBitSetActive = lineEndBitSet & LineEndTypesSupported();
 173                 if (lineEndBitSetActive != cb.GetLineEndTypes()) {
 174                         ModifiedAt(0);
 175                         cb.SetLineEndTypes(lineEndBitSetActive);
 176                         return true;
 177                 } else {
 178                         return false;
 179                 }
 180         } else {
 181                 return false;
 182         }
 183 }
 184
 185 void Document::InsertLine(int line) {
 186         for (int j=0; j<ldSize; j++) {
 187                 if (perLineData[j])
 188                         perLineData[j]->InsertLine(line);
 189         }
 190 }
 191
 192 void Document::RemoveLine(int line) {
 193         for (int j=0; j<ldSize; j++) {
 194                 if (perLineData[j])
 195                         perLineData[j]->RemoveLine(line);
 196         }
 197 }
 198
 199 // Increase reference count and return its previous value.
 200 int Document::AddRef() {
 201         return refCount++;
 202 }
 203
 204 // Decrease reference count and return its previous value.
 205 // Delete the document if reference count reaches zero.
 206 int SCI_METHOD Document::Release() {
 207         int curRefCount = --refCount;
 208         if (curRefCount == 0)
 209                 delete this;
 210         return curRefCount;
 211 }
 212
 213 void Document::SetSavePoint() {
 214         cb.SetSavePoint();
 215         NotifySavePoint(true);
 216 }
 217
 218 void Document::TentativeUndo() {
 219         if (!TentativeActive())
 220                 return;
 221         CheckReadOnly();
 222         if (enteredModification == 0) {
 223                 enteredModification++;
 224                 if (!cb.IsReadOnly()) {
 225                         bool startSavePoint = cb.IsSavePoint();
 226                         bool multiLine = false;
 227                         int steps = cb.TentativeSteps();
 228                         //Platform::DebugPrintf("Steps=%d\n", steps);
 229                         for (int step = 0; step < steps; step++) {
 230                                 const int prevLinesTotal = LinesTotal();
 231                                 const Action &action = cb.GetUndoStep();
 232                                 if (action.at == removeAction) {
 233                                         NotifyModified(DocModification(
 234                                                                         SC_MOD_BEFOREINSERT | SC_PERFORMED_UNDO, action));
 235                                 } else if (action.at == containerAction) {
 236                                         DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_UNDO);
 237                                         dm.token = action.position;
 238                                         NotifyModified(dm);
 239                                 } else {
 240                                         NotifyModified(DocModification(
 241                                                                         SC_MOD_BEFOREDELETE | SC_PERFORMED_UNDO, action));
 242                                 }
 243                                 cb.PerformUndoStep();
 244                                 if (action.at != containerAction) {
 245                                         ModifiedAt(action.position);
 246                                 }
 247
 248                                 int modFlags = SC_PERFORMED_UNDO;
 249                                 // With undo, an insertion action becomes a deletion notification
 250                                 if (action.at == removeAction) {
 251                                         modFlags |= SC_MOD_INSERTTEXT;
 252                                 } else if (action.at == insertAction) {
 253                                         modFlags |= SC_MOD_DELETETEXT;
 254                                 }
 255                                 if (steps > 1)
 256                                         modFlags |= SC_MULTISTEPUNDOREDO;
 257                                 const int linesAdded = LinesTotal() - prevLinesTotal;
 258                                 if (linesAdded != 0)
 259                                         multiLine = true;
 260                                 if (step == steps - 1) {
 261                                         modFlags |= SC_LASTSTEPINUNDOREDO;
 262                                         if (multiLine)
 263                                                 modFlags |= SC_MULTILINEUNDOREDO;
 264                                 }
 265                                 NotifyModified(DocModification(modFlags, action.position, action.lenData,
 266                                                                                            linesAdded, action.data));
 267                         }
 268
 269                         bool endSavePoint = cb.IsSavePoint();
 270                         if (startSavePoint != endSavePoint)
 271                                 NotifySavePoint(endSavePoint);
 272
 273                         cb.TentativeCommit();
 274                 }
 275                 enteredModification--;
 276         }
 277 }
 278
 279 int Document::GetMark(int line) {
 280         return static_cast<LineMarkers *>(perLineData[ldMarkers])->MarkValue(line);
 281 }
 282
 283 int Document::MarkerNext(int lineStart, int mask) const {
 284         return static_cast<LineMarkers *>(perLineData[ldMarkers])->MarkerNext(lineStart, mask);
 285 }
 286
 287 int Document::AddMark(int line, int markerNum) {
 288         if (line >= 0 && line <= LinesTotal()) {
 289                 int prev = static_cast<LineMarkers *>(perLineData[ldMarkers])->
 290                         AddMark(line, markerNum, LinesTotal());
 291                 DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, 0, line);
 292                 NotifyModified(mh);
 293                 return prev;
 294         } else {
 295                 return 0;
 296         }
 297 }
 298
 299 void Document::AddMarkSet(int line, int valueSet) {
 300         if (line < 0 || line > LinesTotal()) {
 301                 return;
 302         }
 303         unsigned int m = valueSet;
 304         for (int i = 0; m; i++, m >>= 1)
 305                 if (m & 1)
 306                         static_cast<LineMarkers *>(perLineData[ldMarkers])->
 307                                 AddMark(line, i, LinesTotal());
 308         DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, 0, line);
 309         NotifyModified(mh);
 310 }
 311
 312 void Document::DeleteMark(int line, int markerNum) {
 313         static_cast<LineMarkers *>(perLineData[ldMarkers])->DeleteMark(line, markerNum, false);
 314         DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, 0, line);
 315         NotifyModified(mh);
 316 }
 317
 318 void Document::DeleteMarkFromHandle(int markerHandle) {
 319         static_cast<LineMarkers *>(perLineData[ldMarkers])->DeleteMarkFromHandle(markerHandle);
 320         DocModification mh(SC_MOD_CHANGEMARKER, 0, 0, 0, 0);
 321         mh.line = -1;
 322         NotifyModified(mh);
 323 }
 324
 325 void Document::DeleteAllMarks(int markerNum) {
 326         bool someChanges = false;
 327         for (int line = 0; line < LinesTotal(); line++) {
 328                 if (static_cast<LineMarkers *>(perLineData[ldMarkers])->DeleteMark(line, markerNum, true))
 329                         someChanges = true;
 330         }
 331         if (someChanges) {
 332                 DocModification mh(SC_MOD_CHANGEMARKER, 0, 0, 0, 0);
 333                 mh.line = -1;
 334                 NotifyModified(mh);
 335         }
 336 }
 337
 338 int Document::LineFromHandle(int markerHandle) {
 339         return static_cast<LineMarkers *>(perLineData[ldMarkers])->LineFromHandle(markerHandle);
 340 }
 341
 342 int SCI_METHOD Document::LineStart(int line) const {
 343         return cb.LineStart(line);
 344 }
 345
 346 bool Document::IsLineStartPosition(int position) const {
 347         return LineStart(LineFromPosition(position)) == position;
 348 }
 349
 350 int SCI_METHOD Document::LineEnd(int line) const {
 351         if (line >= LinesTotal() - 1) {
 352                 return LineStart(line + 1);
 353         } else {
 354                 int position = LineStart(line + 1);
 355                 if (SC_CP_UTF8 == dbcsCodePage) {
 356                         unsigned char bytes[] = {
 357                                 static_cast<unsigned char>(cb.CharAt(position-3)),
 358                                 static_cast<unsigned char>(cb.CharAt(position-2)),
 359                                 static_cast<unsigned char>(cb.CharAt(position-1)),
 360                         };
 361                         if (UTF8IsSeparator(bytes)) {
 362                                 return position - UTF8SeparatorLength;
 363                         }
 364                         if (UTF8IsNEL(bytes+1)) {
 365                                 return position - UTF8NELLength;
 366                         }
 367                 }
 368                 position--; // Back over CR or LF
 369                 // When line terminator is CR+LF, may need to go back one more
 370                 if ((position > LineStart(line)) && (cb.CharAt(position - 1) == '\r')) {
 371                         position--;
 372                 }
 373                 return position;
 374         }
 375 }
 376
 377 void SCI_METHOD Document::SetErrorStatus(int status) {
 378         // Tell the watchers an error has occurred.
 379         for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
 380                 it->watcher->NotifyErrorOccurred(this, it->userData, status);
 381         }
 382 }
 383
 384 int SCI_METHOD Document::LineFromPosition(int pos) const {
 385         return cb.LineFromPosition(pos);
 386 }
 387
 388 int Document::LineEndPosition(int position) const {
 389         return LineEnd(LineFromPosition(position));
 390 }
 391
 392 bool Document::IsLineEndPosition(int position) const {
 393         return LineEnd(LineFromPosition(position)) == position;
 394 }
 395
 396 bool Document::IsPositionInLineEnd(int position) const {
 397         return position >= LineEnd(LineFromPosition(position));
 398 }
 399
 400 int Document::VCHomePosition(int position) const {
 401         int line = LineFromPosition(position);
 402         int startPosition = LineStart(line);
 403         int endLine = LineEnd(line);
 404         int startText = startPosition;
 405         while (startText < endLine && (cb.CharAt(startText) == ' ' || cb.CharAt(startText) == '\t'))
 406                 startText++;
 407         if (position == startText)
 408                 return startPosition;
 409         else
 410                 return startText;
 411 }
 412
 413 int SCI_METHOD Document::SetLevel(int line, int level) {
 414         int prev = static_cast<LineLevels *>(perLineData[ldLevels])->SetLevel(line, level, LinesTotal());
 415         if (prev != level) {
 416                 DocModification mh(SC_MOD_CHANGEFOLD | SC_MOD_CHANGEMARKER,
 417                                    LineStart(line), 0, 0, 0, line);
 418                 mh.foldLevelNow = level;
 419                 mh.foldLevelPrev = prev;
 420                 NotifyModified(mh);
 421         }
 422         return prev;
 423 }
 424
 425 int SCI_METHOD Document::GetLevel(int line) const {
 426         return static_cast<LineLevels *>(perLineData[ldLevels])->GetLevel(line);
 427 }
 428
 429 void Document::ClearLevels() {
 430         static_cast<LineLevels *>(perLineData[ldLevels])->ClearLevels();
 431 }
 432
 433 static bool IsSubordinate(int levelStart, int levelTry) {
 434         if (levelTry & SC_FOLDLEVELWHITEFLAG)
 435                 return true;
 436         else
 437                 return (levelStart & SC_FOLDLEVELNUMBERMASK) < (levelTry & SC_FOLDLEVELNUMBERMASK);
 438 }
 439
 440 int Document::GetLastChild(int lineParent, int level, int lastLine) {
 441         if (level == -1)
 442                 level = GetLevel(lineParent) & SC_FOLDLEVELNUMBERMASK;
 443         int maxLine = LinesTotal();
 444         int lookLastLine = (lastLine != -1) ? Platform::Minimum(LinesTotal() - 1, lastLine) : -1;
 445         int lineMaxSubord = lineParent;
 446         while (lineMaxSubord < maxLine - 1) {
 447                 EnsureStyledTo(LineStart(lineMaxSubord + 2));
 448                 if (!IsSubordinate(level, GetLevel(lineMaxSubord + 1)))
 449                         break;
 450                 if ((lookLastLine != -1) && (lineMaxSubord >= lookLastLine) && !(GetLevel(lineMaxSubord) & SC_FOLDLEVELWHITEFLAG))
 451                         break;
 452                 lineMaxSubord++;
 453         }
 454         if (lineMaxSubord > lineParent) {
 455                 if (level > (GetLevel(lineMaxSubord + 1) & SC_FOLDLEVELNUMBERMASK)) {
 456                         // Have chewed up some whitespace that belongs to a parent so seek back
 457                         if (GetLevel(lineMaxSubord) & SC_FOLDLEVELWHITEFLAG) {
 458                                 lineMaxSubord--;
 459                         }
 460                 }
 461         }
 462         return lineMaxSubord;
 463 }
 464
 465 int Document::GetFoldParent(int line) const {
 466         int level = GetLevel(line) & SC_FOLDLEVELNUMBERMASK;
 467         int lineLook = line - 1;
 468         while ((lineLook > 0) && (
 469                     (!(GetLevel(lineLook) & SC_FOLDLEVELHEADERFLAG)) ||
 470                     ((GetLevel(lineLook) & SC_FOLDLEVELNUMBERMASK) >= level))
 471               ) {
 472                 lineLook--;
 473         }
 474         if ((GetLevel(lineLook) & SC_FOLDLEVELHEADERFLAG) &&
 475                 ((GetLevel(lineLook) & SC_FOLDLEVELNUMBERMASK) < level)) {
 476                 return lineLook;
 477         } else {
 478                 return -1;
 479         }
 480 }
 481
 482 void Document::GetHighlightDelimiters(HighlightDelimiter &highlightDelimiter, int line, int lastLine) {
 483         int level = GetLevel(line);
 484         int lookLastLine = Platform::Maximum(line, lastLine) + 1;
 485
 486         int lookLine = line;
 487         int lookLineLevel = level;
 488         int lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
 489         while ((lookLine > 0) && ((lookLineLevel & SC_FOLDLEVELWHITEFLAG) ||
 490                 ((lookLineLevel & SC_FOLDLEVELHEADERFLAG) && (lookLineLevelNum >= (GetLevel(lookLine + 1) & SC_FOLDLEVELNUMBERMASK))))) {
 491                 lookLineLevel = GetLevel(--lookLine);
 492                 lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
 493         }
 494
 495         int beginFoldBlock = (lookLineLevel & SC_FOLDLEVELHEADERFLAG) ? lookLine : GetFoldParent(lookLine);
 496         if (beginFoldBlock == -1) {
 497                 highlightDelimiter.Clear();
 498                 return;
 499         }
 500
 501         int endFoldBlock = GetLastChild(beginFoldBlock, -1, lookLastLine);
 502         int firstChangeableLineBefore = -1;
 503         if (endFoldBlock < line) {
 504                 lookLine = beginFoldBlock - 1;
 505                 lookLineLevel = GetLevel(lookLine);
 506                 lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
 507                 while ((lookLine >= 0) && (lookLineLevelNum >= SC_FOLDLEVELBASE)) {
 508                         if (lookLineLevel & SC_FOLDLEVELHEADERFLAG) {
 509                                 if (GetLastChild(lookLine, -1, lookLastLine) == line) {
 510                                         beginFoldBlock = lookLine;
 511                                         endFoldBlock = line;
 512                                         firstChangeableLineBefore = line - 1;
 513                                 }
 514                         }
 515                         if ((lookLine > 0) && (lookLineLevelNum == SC_FOLDLEVELBASE) && ((GetLevel(lookLine - 1) & SC_FOLDLEVELNUMBERMASK) > lookLineLevelNum))
 516                                 break;
 517                         lookLineLevel = GetLevel(--lookLine);
 518                         lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
 519                 }
 520         }
 521         if (firstChangeableLineBefore == -1) {
 522                 for (lookLine = line - 1, lookLineLevel = GetLevel(lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
 523                         lookLine >= beginFoldBlock;
 524                         lookLineLevel = GetLevel(--lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK) {
 525                         if ((lookLineLevel & SC_FOLDLEVELWHITEFLAG) || (lookLineLevelNum > (level & SC_FOLDLEVELNUMBERMASK))) {
 526                                 firstChangeableLineBefore = lookLine;
 527                                 break;
 528                         }
 529                 }
 530         }
 531         if (firstChangeableLineBefore == -1)
 532                 firstChangeableLineBefore = beginFoldBlock - 1;
 533
 534         int firstChangeableLineAfter = -1;
 535         for (lookLine = line + 1, lookLineLevel = GetLevel(lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
 536                 lookLine <= endFoldBlock;
 537                 lookLineLevel = GetLevel(++lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK) {
 538                 if ((lookLineLevel & SC_FOLDLEVELHEADERFLAG) && (lookLineLevelNum < (GetLevel(lookLine + 1) & SC_FOLDLEVELNUMBERMASK))) {
 539                         firstChangeableLineAfter = lookLine;
 540                         break;
 541                 }
 542         }
 543         if (firstChangeableLineAfter == -1)
 544                 firstChangeableLineAfter = endFoldBlock + 1;
 545
 546         highlightDelimiter.beginFoldBlock = beginFoldBlock;
 547         highlightDelimiter.endFoldBlock = endFoldBlock;
 548         highlightDelimiter.firstChangeableLineBefore = firstChangeableLineBefore;
 549         highlightDelimiter.firstChangeableLineAfter = firstChangeableLineAfter;
 550 }
 551
 552 int Document::ClampPositionIntoDocument(int pos) const {
 553         return Platform::Clamp(pos, 0, Length());
 554 }
 555
 556 bool Document::IsCrLf(int pos) const {
 557         if (pos < 0)
 558                 return false;
 559         if (pos >= (Length() - 1))
 560                 return false;
 561         return (cb.CharAt(pos) == '\r') && (cb.CharAt(pos + 1) == '\n');
 562 }
 563
 564 int Document::LenChar(int pos) {
 565         if (pos < 0) {
 566                 return 1;
 567         } else if (IsCrLf(pos)) {
 568                 return 2;
 569         } else if (SC_CP_UTF8 == dbcsCodePage) {
 570                 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(pos));
 571                 const int widthCharBytes = UTF8BytesOfLead[leadByte];
 572                 int lengthDoc = Length();
 573                 if ((pos + widthCharBytes) > lengthDoc)
 574                         return lengthDoc - pos;
 575                 else
 576                         return widthCharBytes;
 577         } else if (dbcsCodePage) {
 578                 return IsDBCSLeadByte(cb.CharAt(pos)) ? 2 : 1;
 579         } else {
 580                 return 1;
 581         }
 582 }
 583
 584 bool Document::InGoodUTF8(int pos, int &start, int &end) const {
 585         int trail = pos;
 586         while ((trail>0) && (pos-trail < UTF8MaxBytes) && UTF8IsTrailByte(static_cast<unsigned char>(cb.CharAt(trail-1))))
 587                 trail--;
 588         start = (trail > 0) ? trail-1 : trail;
 589
 590         const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(start));
 591         const int widthCharBytes = UTF8BytesOfLead[leadByte];
 592         if (widthCharBytes == 1) {
 593                 return false;
 594         } else {
 595                 int trailBytes = widthCharBytes - 1;
 596                 int len = pos - start;
 597                 if (len > trailBytes)
 598                         // pos too far from lead
 599                         return false;
 600                 char charBytes[UTF8MaxBytes] = {static_cast<char>(leadByte),0,0,0};
 601                 for (int b=1; b<widthCharBytes && ((start+b) < Length()); b++)
 602                         charBytes[b] = cb.CharAt(static_cast<int>(start+b));
 603                 int utf8status = UTF8Classify(reinterpret_cast<const unsigned char *>(charBytes), widthCharBytes);
 604                 if (utf8status & UTF8MaskInvalid)
 605                         return false;
 606                 end = start + widthCharBytes;
 607                 return true;
 608         }
 609 }
 610
 611 // Normalise a position so that it is not halfway through a two byte character.
 612 // This can occur in two situations -
 613 // When lines are terminated with \r\n pairs which should be treated as one character.
 614 // When displaying DBCS text such as Japanese.
 615 // If moving, move the position in the indicated direction.
 616 int Document::MovePositionOutsideChar(int pos, int moveDir, bool checkLineEnd) const {
 617         //Platform::DebugPrintf("NoCRLF %d %d\n", pos, moveDir);
 618         // If out of range, just return minimum/maximum value.
 619         if (pos <= 0)
 620                 return 0;
 621         if (pos >= Length())
 622                 return Length();
 623
 624         // PLATFORM_ASSERT(pos > 0 && pos < Length());
 625         if (checkLineEnd && IsCrLf(pos - 1)) {
 626                 if (moveDir > 0)
 627                         return pos + 1;
 628                 else
 629                         return pos - 1;
 630         }
 631
 632         if (dbcsCodePage) {
 633                 if (SC_CP_UTF8 == dbcsCodePage) {
 634                         unsigned char ch = static_cast<unsigned char>(cb.CharAt(pos));
 635                         // If ch is not a trail byte then pos is valid intercharacter position
 636                         if (UTF8IsTrailByte(ch)) {
 637                                 int startUTF = pos;
 638                                 int endUTF = pos;
 639                                 if (InGoodUTF8(pos, startUTF, endUTF)) {
 640                                         // ch is a trail byte within a UTF-8 character
 641                                         if (moveDir > 0)
 642                                                 pos = endUTF;
 643                                         else
 644                                                 pos = startUTF;
 645                                 }
 646                                 // Else invalid UTF-8 so return position of isolated trail byte
 647                         }
 648                 } else {
 649                         // Anchor DBCS calculations at start of line because start of line can
 650                         // not be a DBCS trail byte.
 651                         int posStartLine = LineStart(LineFromPosition(pos));
 652                         if (pos == posStartLine)
 653                                 return pos;
 654
 655                         // Step back until a non-lead-byte is found.
 656                         int posCheck = pos;
 657                         while ((posCheck > posStartLine) && IsDBCSLeadByte(cb.CharAt(posCheck-1)))
 658                                 posCheck--;
 659
 660                         // Check from known start of character.
 661                         while (posCheck < pos) {
 662                                 int mbsize = IsDBCSLeadByte(cb.CharAt(posCheck)) ? 2 : 1;
 663                                 if (posCheck + mbsize == pos) {
 664                                         return pos;
 665                                 } else if (posCheck + mbsize > pos) {
 666                                         if (moveDir > 0) {
 667                                                 return posCheck + mbsize;
 668                                         } else {
 669                                                 return posCheck;
 670                                         }
 671                                 }
 672                                 posCheck += mbsize;
 673                         }
 674                 }
 675         }
 676
 677         return pos;
 678 }
 679
 680 // NextPosition moves between valid positions - it can not handle a position in the middle of a
 681 // multi-byte character. It is used to iterate through text more efficiently than MovePositionOutsideChar.
 682 // A \r\n pair is treated as two characters.
 683 int Document::NextPosition(int pos, int moveDir) const {
 684         // If out of range, just return minimum/maximum value.
 685         int increment = (moveDir > 0) ? 1 : -1;
 686         if (pos + increment <= 0)
 687                 return 0;
 688         if (pos + increment >= Length())
 689                 return Length();
 690
 691         if (dbcsCodePage) {
 692                 if (SC_CP_UTF8 == dbcsCodePage) {
 693                         if (increment == 1) {
 694                                 // Simple forward movement case so can avoid some checks
 695                                 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(pos));
 696                                 if (UTF8IsAscii(leadByte)) {
 697                                         // Single byte character or invalid
 698                                         pos++;
 699                                 } else {
 700                                         const int widthCharBytes = UTF8BytesOfLead[leadByte];
 701                                         char charBytes[UTF8MaxBytes] = {static_cast<char>(leadByte),0,0,0};
 702                                         for (int b=1; b<widthCharBytes; b++)
 703                                                 charBytes[b] = cb.CharAt(static_cast<int>(pos+b));
 704                                         int utf8status = UTF8Classify(reinterpret_cast<const unsigned char *>(charBytes), widthCharBytes);
 705                                         if (utf8status & UTF8MaskInvalid)
 706                                                 pos++;
 707                                         else
 708                                                 pos += utf8status & UTF8MaskWidth;
 709                                 }
 710                         } else {
 711                                 // Examine byte before position
 712                                 pos--;
 713                                 unsigned char ch = static_cast<unsigned char>(cb.CharAt(pos));
 714                                 // If ch is not a trail byte then pos is valid intercharacter position
 715                                 if (UTF8IsTrailByte(ch)) {
 716                                         // If ch is a trail byte in a valid UTF-8 character then return start of character
 717                                         int startUTF = pos;
 718                                         int endUTF = pos;
 719                                         if (InGoodUTF8(pos, startUTF, endUTF)) {
 720                                                 pos = startUTF;
 721                                         }
 722                                         // Else invalid UTF-8 so return position of isolated trail byte
 723                                 }
 724                         }
 725                 } else {
 726                         if (moveDir > 0) {
 727                                 int mbsize = IsDBCSLeadByte(cb.CharAt(pos)) ? 2 : 1;
 728                                 pos += mbsize;
 729                                 if (pos > Length())
 730                                         pos = Length();
 731                         } else {
 732                                 // Anchor DBCS calculations at start of line because start of line can
 733                                 // not be a DBCS trail byte.
 734                                 int posStartLine = LineStart(LineFromPosition(pos));
 735                                 // See http://msdn.microsoft.com/en-us/library/cc194792%28v=MSDN.10%29.aspx
 736                                 // http://msdn.microsoft.com/en-us/library/cc194790.aspx
 737                                 if ((pos - 1) <= posStartLine) {
 738                                         return pos - 1;
 739                                 } else if (IsDBCSLeadByte(cb.CharAt(pos - 1))) {
 740                                         // Must actually be trail byte
 741                                         return pos - 2;
 742                                 } else {
 743                                         // Otherwise, step back until a non-lead-byte is found.
 744                                         int posTemp = pos - 1;
 745                                         while (posStartLine <= --posTemp && IsDBCSLeadByte(cb.CharAt(posTemp)))
 746                                                 ;
 747                                         // Now posTemp+1 must point to the beginning of a character,
 748                                         // so figure out whether we went back an even or an odd
 749                                         // number of bytes and go back 1 or 2 bytes, respectively.
 750                                         return (pos - 1 - ((pos - posTemp) & 1));
 751                                 }
 752                         }
 753                 }
 754         } else {
 755                 pos += increment;
 756         }
 757
 758         return pos;
 759 }
 760
 761 bool Document::NextCharacter(int &pos, int moveDir) const {
 762         // Returns true if pos changed
 763         int posNext = NextPosition(pos, moveDir);
 764         if (posNext == pos) {
 765                 return false;
 766         } else {
 767                 pos = posNext;
 768                 return true;
 769         }
 770 }
 771
 772 // Return -1  on out-of-bounds
 773 int SCI_METHOD Document::GetRelativePosition(int positionStart, int characterOffset) const {
 774         int pos = positionStart;
 775         if (dbcsCodePage) {
 776                 const int increment = (characterOffset > 0) ? 1 : -1;
 777                 while (characterOffset != 0) {
 778                         const int posNext = NextPosition(pos, increment);
 779                         if (posNext == pos)
 780                                 return INVALID_POSITION;
 781                         pos = posNext;
 782                         characterOffset -= increment;
 783                 }
 784         } else {
 785                 pos = positionStart + characterOffset;
 786                 if ((pos < 0) || (pos > Length()))
 787                         return INVALID_POSITION;
 788         }
 789         return pos;
 790 }
 791
 792 int Document::GetRelativePositionUTF16(int positionStart, int characterOffset) const {
 793         int pos = positionStart;
 794         if (dbcsCodePage) {
 795                 const int increment = (characterOffset > 0) ? 1 : -1;
 796                 while (characterOffset != 0) {
 797                         const int posNext = NextPosition(pos, increment);
 798                         if (posNext == pos)
 799                                 return INVALID_POSITION;
 800                         if (abs(pos-posNext) > 3)       // 4 byte character = 2*UTF16.
 801                                 characterOffset -= increment;
 802                         pos = posNext;
 803                         characterOffset -= increment;
 804                 }
 805         } else {
 806                 pos = positionStart + characterOffset;
 807                 if ((pos < 0) || (pos > Length()))
 808                         return INVALID_POSITION;
 809         }
 810         return pos;
 811 }
 812
 813 int SCI_METHOD Document::GetCharacterAndWidth(int position, int *pWidth) const {
 814         int character;
 815         int bytesInCharacter = 1;
 816         if (dbcsCodePage) {
 817                 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(position));
 818                 if (SC_CP_UTF8 == dbcsCodePage) {
 819                         if (UTF8IsAscii(leadByte)) {
 820                                 // Single byte character or invalid
 821                                 character =  leadByte;
 822                         } else {
 823                                 const int widthCharBytes = UTF8BytesOfLead[leadByte];
 824                                 unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0};
 825                                 for (int b=1; b<widthCharBytes; b++)
 826                                         charBytes[b] = static_cast<unsigned char>(cb.CharAt(position+b));
 827                                 int utf8status = UTF8Classify(charBytes, widthCharBytes);
 828                                 if (utf8status & UTF8MaskInvalid) {
 829                                         // Report as singleton surrogate values which are invalid Unicode
 830                                         character =  0xDC80 + leadByte;
 831                                 } else {
 832                                         bytesInCharacter = utf8status & UTF8MaskWidth;
 833                                         character = UnicodeFromUTF8(charBytes);
 834                                 }
 835                         }
 836                 } else {
 837                         if (IsDBCSLeadByte(leadByte)) {
 838                                 bytesInCharacter = 2;
 839                                 character = (leadByte << 8) | static_cast<unsigned char>(cb.CharAt(position+1));
 840                         } else {
 841                                 character = leadByte;
 842                         }
 843                 }
 844         } else {
 845                 character = cb.CharAt(position);
 846         }
 847         if (pWidth) {
 848                 *pWidth = bytesInCharacter;
 849         }
 850         return character;
 851 }
 852
 853 int SCI_METHOD Document::CodePage() const {
 854         return dbcsCodePage;
 855 }
 856
 857 bool SCI_METHOD Document::IsDBCSLeadByte(char ch) const {
 858         // Byte ranges found in Wikipedia articles with relevant search strings in each case
 859         unsigned char uch = static_cast<unsigned char>(ch);
 860         switch (dbcsCodePage) {
 861                 case 932:
 862                         // Shift_jis
 863                         return ((uch >= 0x81) && (uch <= 0x9F)) ||
 864                                 ((uch >= 0xE0) && (uch <= 0xFC));
 865                                 // Lead bytes F0 to FC may be a Microsoft addition.
 866                 case 936:
 867                         // GBK
 868                         return (uch >= 0x81) && (uch <= 0xFE);
 869                 case 949:
 870                         // Korean Wansung KS C-5601-1987
 871                         return (uch >= 0x81) && (uch <= 0xFE);
 872                 case 950:
 873                         // Big5
 874                         return (uch >= 0x81) && (uch <= 0xFE);
 875                 case 1361:
 876                         // Korean Johab KS C-5601-1992
 877                         return
 878                                 ((uch >= 0x84) && (uch <= 0xD3)) ||
 879                                 ((uch >= 0xD8) && (uch <= 0xDE)) ||
 880                                 ((uch >= 0xE0) && (uch <= 0xF9));
 881         }
 882         return false;
 883 }
 884
 885 static inline bool IsSpaceOrTab(int ch) {
 886         return ch == ' ' || ch == '\t';
 887 }
 888
 889 // Need to break text into segments near lengthSegment but taking into
 890 // account the encoding to not break inside a UTF-8 or DBCS character
 891 // and also trying to avoid breaking inside a pair of combining characters.
 892 // The segment length must always be long enough (more than 4 bytes)
 893 // so that there will be at least one whole character to make a segment.
 894 // For UTF-8, text must consist only of valid whole characters.
 895 // In preference order from best to worst:
 896 //   1) Break after space
 897 //   2) Break before punctuation
 898 //   3) Break after whole character
 899
 900 int Document::SafeSegment(const char *text, int length, int lengthSegment) const {
 901         if (length <= lengthSegment)
 902                 return length;
 903         int lastSpaceBreak = -1;
 904         int lastPunctuationBreak = -1;
 905         int lastEncodingAllowedBreak = 0;
 906         for (int j=0; j < lengthSegment;) {
 907                 unsigned char ch = static_cast<unsigned char>(text[j]);
 908                 if (j > 0) {
 909                         if (IsSpaceOrTab(text[j - 1]) && !IsSpaceOrTab(text[j])) {
 910                                 lastSpaceBreak = j;
 911                         }
 912                         if (ch < 'A') {
 913                                 lastPunctuationBreak = j;
 914                         }
 915                 }
 916                 lastEncodingAllowedBreak = j;
 917
 918                 if (dbcsCodePage == SC_CP_UTF8) {
 919                         j += UTF8BytesOfLead[ch];
 920                 } else if (dbcsCodePage) {
 921                         j += IsDBCSLeadByte(ch) ? 2 : 1;
 922                 } else {
 923                         j++;
 924                 }
 925         }
 926         if (lastSpaceBreak >= 0) {
 927                 return lastSpaceBreak;
 928         } else if (lastPunctuationBreak >= 0) {
 929                 return lastPunctuationBreak;
 930         }
 931         return lastEncodingAllowedBreak;
 932 }
 933
 934 EncodingFamily Document::CodePageFamily() const {
 935         if (SC_CP_UTF8 == dbcsCodePage)
 936                 return efUnicode;
 937         else if (dbcsCodePage)
 938                 return efDBCS;
 939         else
 940                 return efEightBit;
 941 }
 942
 943 void Document::ModifiedAt(int pos) {
 944         if (endStyled > pos)
 945                 endStyled = pos;
 946 }
 947
 948 void Document::CheckReadOnly() {
 949         if (cb.IsReadOnly() && enteredReadOnlyCount == 0) {
 950                 enteredReadOnlyCount++;
 951                 NotifyModifyAttempt();
 952                 enteredReadOnlyCount--;
 953         }
 954 }
 955
 956 // Document only modified by gateways DeleteChars, InsertString, Undo, Redo, and SetStyleAt.
 957 // SetStyleAt does not change the persistent state of a document
 958
 959 bool Document::DeleteChars(int pos, int len) {
 960         if (pos < 0)
 961                 return false;
 962         if (len <= 0)
 963                 return false;
 964         if ((pos + len) > Length())
 965                 return false;
 966         CheckReadOnly();
 967         if (enteredModification != 0) {
 968                 return false;
 969         } else {
 970                 enteredModification++;
 971                 if (!cb.IsReadOnly()) {
 972                         NotifyModified(
 973                             DocModification(
 974                                 SC_MOD_BEFOREDELETE | SC_PERFORMED_USER,
 975                                 pos, len,
 976                                 0, 0));
 977                         int prevLinesTotal = LinesTotal();
 978                         bool startSavePoint = cb.IsSavePoint();
 979                         bool startSequence = false;
 980                         const char *text = cb.DeleteChars(pos, len, startSequence);
 981                         if (startSavePoint && cb.IsCollectingUndo())
 982                                 NotifySavePoint(!startSavePoint);
 983                         if ((pos < Length()) || (pos == 0))
 984                                 ModifiedAt(pos);
 985                         else
 986                                 ModifiedAt(pos-1);
 987                         NotifyModified(
 988                             DocModification(
 989                                 SC_MOD_DELETETEXT | SC_PERFORMED_USER | (startSequence?SC_STARTACTION:0),
 990                                 pos, len,
 991                                 LinesTotal() - prevLinesTotal, text));
 992                 }
 993                 enteredModification--;
 994         }
 995         return !cb.IsReadOnly();
 996 }
 997
 998 /**
 999  * Insert a string with a length.
1000  */
1001 int Document::InsertString(int position, const char *s, int insertLength) {
1002         if (insertLength <= 0) {
1003                 return 0;
1004         }
1005         CheckReadOnly();        // Application may change read only state here
1006         if (cb.IsReadOnly()) {
1007                 return 0;
1008         }
1009         if (enteredModification != 0) {
1010                 return 0;
1011         }
1012         enteredModification++;
1013         insertionSet = false;
1014         insertion.clear();
1015         NotifyModified(
1016                 DocModification(
1017                         SC_MOD_INSERTCHECK,
1018                         position, insertLength,
1019                         0, s));
1020         if (insertionSet) {
1021                 s = insertion.c_str();
1022                 insertLength = static_cast<int>(insertion.length());
1023         }
1024         NotifyModified(
1025                 DocModification(
1026                         SC_MOD_BEFOREINSERT | SC_PERFORMED_USER,
1027                         position, insertLength,
1028                         0, s));
1029         int prevLinesTotal = LinesTotal();
1030         bool startSavePoint = cb.IsSavePoint();
1031         bool startSequence = false;
1032         const char *text = cb.InsertString(position, s, insertLength, startSequence);
1033         if (startSavePoint && cb.IsCollectingUndo())
1034                 NotifySavePoint(!startSavePoint);
1035         ModifiedAt(position);
1036         NotifyModified(
1037                 DocModification(
1038                         SC_MOD_INSERTTEXT | SC_PERFORMED_USER | (startSequence?SC_STARTACTION:0),
1039                         position, insertLength,
1040                         LinesTotal() - prevLinesTotal, text));
1041         if (insertionSet) {     // Free memory as could be large
1042                 std::string().swap(insertion);
1043         }
1044         enteredModification--;
1045         return insertLength;
1046 }
1047
1048 void Document::ChangeInsertion(const char *s, int length) {
1049         insertionSet = true;
1050         insertion.assign(s, length);
1051 }
1052
1053 int SCI_METHOD Document::AddData(char *data, int length) {
1054         try {
1055                 int position = Length();
1056                 InsertString(position, data, length);
1057         } catch (std::bad_alloc &) {
1058                 return SC_STATUS_BADALLOC;
1059         } catch (...) {
1060                 return SC_STATUS_FAILURE;
1061         }
1062         return 0;
1063 }
1064
1065 void * SCI_METHOD Document::ConvertToDocument() {
1066         return this;
1067 }
1068
1069 int Document::Undo() {
1070         int newPos = -1;
1071         CheckReadOnly();
1072         if ((enteredModification == 0) && (cb.IsCollectingUndo())) {
1073                 enteredModification++;
1074                 if (!cb.IsReadOnly()) {
1075                         bool startSavePoint = cb.IsSavePoint();
1076                         bool multiLine = false;
1077                         int steps = cb.StartUndo();
1078                         //Platform::DebugPrintf("Steps=%d\n", steps);
1079                         int coalescedRemovePos = -1;
1080                         int coalescedRemoveLen = 0;
1081                         int prevRemoveActionPos = -1;
1082                         int prevRemoveActionLen = 0;
1083                         for (int step = 0; step < steps; step++) {
1084                                 const int prevLinesTotal = LinesTotal();
1085                                 const Action &action = cb.GetUndoStep();
1086                                 if (action.at == removeAction) {
1087                                         NotifyModified(DocModification(
1088                                                                         SC_MOD_BEFOREINSERT | SC_PERFORMED_UNDO, action));
1089                                 } else if (action.at == containerAction) {
1090                                         DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_UNDO);
1091                                         dm.token = action.position;
1092                                         NotifyModified(dm);
1093                                         if (!action.mayCoalesce) {
1094                                                 coalescedRemovePos = -1;
1095                                                 coalescedRemoveLen = 0;
1096                                                 prevRemoveActionPos = -1;
1097                                                 prevRemoveActionLen = 0;
1098                                         }
1099                                 } else {
1100                                         NotifyModified(DocModification(
1101                                                                         SC_MOD_BEFOREDELETE | SC_PERFORMED_UNDO, action));
1102                                 }
1103                                 cb.PerformUndoStep();
1104                                 if (action.at != containerAction) {
1105                                         ModifiedAt(action.position);
1106                                         newPos = action.position;
1107                                 }
1108
1109                                 int modFlags = SC_PERFORMED_UNDO;
1110                                 // With undo, an insertion action becomes a deletion notification
1111                                 if (action.at == removeAction) {
1112                                         newPos += action.lenData;
1113                                         modFlags |= SC_MOD_INSERTTEXT;
1114                                         if ((coalescedRemoveLen > 0) &&
1115                                                 (action.position == prevRemoveActionPos || action.position == (prevRemoveActionPos + prevRemoveActionLen))) {
1116                                                 coalescedRemoveLen += action.lenData;
1117                                                 newPos = coalescedRemovePos + coalescedRemoveLen;
1118                                         } else {
1119                                                 coalescedRemovePos = action.position;
1120                                                 coalescedRemoveLen = action.lenData;
1121                                         }
1122                                         prevRemoveActionPos = action.position;
1123                                         prevRemoveActionLen = action.lenData;
1124                                 } else if (action.at == insertAction) {
1125                                         modFlags |= SC_MOD_DELETETEXT;
1126                                         coalescedRemovePos = -1;
1127                                         coalescedRemoveLen = 0;
1128                                         prevRemoveActionPos = -1;
1129                                         prevRemoveActionLen = 0;
1130                                 }
1131                                 if (steps > 1)
1132                                         modFlags |= SC_MULTISTEPUNDOREDO;
1133                                 const int linesAdded = LinesTotal() - prevLinesTotal;
1134                                 if (linesAdded != 0)
1135                                         multiLine = true;
1136                                 if (step == steps - 1) {
1137                                         modFlags |= SC_LASTSTEPINUNDOREDO;
1138                                         if (multiLine)
1139                                                 modFlags |= SC_MULTILINEUNDOREDO;
1140                                 }
1141                                 NotifyModified(DocModification(modFlags, action.position, action.lenData,
1142                                                                                            linesAdded, action.data));
1143                         }
1144
1145                         bool endSavePoint = cb.IsSavePoint();
1146                         if (startSavePoint != endSavePoint)
1147                                 NotifySavePoint(endSavePoint);
1148                 }
1149                 enteredModification--;
1150         }
1151         return newPos;
1152 }
1153
1154 int Document::Redo() {
1155         int newPos = -1;
1156         CheckReadOnly();
1157         if ((enteredModification == 0) && (cb.IsCollectingUndo())) {
1158                 enteredModification++;
1159                 if (!cb.IsReadOnly()) {
1160                         bool startSavePoint = cb.IsSavePoint();
1161                         bool multiLine = false;
1162                         int steps = cb.StartRedo();
1163                         for (int step = 0; step < steps; step++) {
1164                                 const int prevLinesTotal = LinesTotal();
1165                                 const Action &action = cb.GetRedoStep();
1166                                 if (action.at == insertAction) {
1167                                         NotifyModified(DocModification(
1168                                                                         SC_MOD_BEFOREINSERT | SC_PERFORMED_REDO, action));
1169                                 } else if (action.at == containerAction) {
1170                                         DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_REDO);
1171                                         dm.token = action.position;
1172                                         NotifyModified(dm);
1173                                 } else {
1174                                         NotifyModified(DocModification(
1175                                                                         SC_MOD_BEFOREDELETE | SC_PERFORMED_REDO, action));
1176                                 }
1177                                 cb.PerformRedoStep();
1178                                 if (action.at != containerAction) {
1179                                         ModifiedAt(action.position);
1180                                         newPos = action.position;
1181                                 }
1182
1183                                 int modFlags = SC_PERFORMED_REDO;
1184                                 if (action.at == insertAction) {
1185                                         newPos += action.lenData;
1186                                         modFlags |= SC_MOD_INSERTTEXT;
1187                                 } else if (action.at == removeAction) {
1188                                         modFlags |= SC_MOD_DELETETEXT;
1189                                 }
1190                                 if (steps > 1)
1191                                         modFlags |= SC_MULTISTEPUNDOREDO;
1192                                 const int linesAdded = LinesTotal() - prevLinesTotal;
1193                                 if (linesAdded != 0)
1194                                         multiLine = true;
1195                                 if (step == steps - 1) {
1196                                         modFlags |= SC_LASTSTEPINUNDOREDO;
1197                                         if (multiLine)
1198                                                 modFlags |= SC_MULTILINEUNDOREDO;
1199                                 }
1200                                 NotifyModified(
1201                                         DocModification(modFlags, action.position, action.lenData,
1202                                                                         linesAdded, action.data));
1203                         }
1204
1205                         bool endSavePoint = cb.IsSavePoint();
1206                         if (startSavePoint != endSavePoint)
1207                                 NotifySavePoint(endSavePoint);
1208                 }
1209                 enteredModification--;
1210         }
1211         return newPos;
1212 }
1213
1214 void Document::DelChar(int pos) {
1215         DeleteChars(pos, LenChar(pos));
1216 }
1217
1218 void Document::DelCharBack(int pos) {
1219         if (pos <= 0) {
1220                 return;
1221         } else if (IsCrLf(pos - 2)) {
1222                 DeleteChars(pos - 2, 2);
1223         } else if (dbcsCodePage) {
1224                 int startChar = NextPosition(pos, -1);
1225                 DeleteChars(startChar, pos - startChar);
1226         } else {
1227                 DeleteChars(pos - 1, 1);
1228         }
1229 }
1230
1231 static int NextTab(int pos, int tabSize) {
1232         return ((pos / tabSize) + 1) * tabSize;
1233 }
1234
1235 static std::string CreateIndentation(int indent, int tabSize, bool insertSpaces) {
1236         std::string indentation;
1237         if (!insertSpaces) {
1238                 while (indent >= tabSize) {
1239                         indentation += '\t';
1240                         indent -= tabSize;
1241                 }
1242         }
1243         while (indent > 0) {
1244                 indentation += ' ';
1245                 indent--;
1246         }
1247         return indentation;
1248 }
1249
1250 int SCI_METHOD Document::GetLineIndentation(int line) {
1251         int indent = 0;
1252         if ((line >= 0) && (line < LinesTotal())) {
1253                 int lineStart = LineStart(line);
1254                 int length = Length();
1255                 for (int i = lineStart; i < length; i++) {
1256                         char ch = cb.CharAt(i);
1257                         if (ch == ' ')
1258                                 indent++;
1259                         else if (ch == '\t')
1260                                 indent = NextTab(indent, tabInChars);
1261                         else
1262                                 return indent;
1263                 }
1264         }
1265         return indent;
1266 }
1267
1268 int Document::SetLineIndentation(int line, int indent) {
1269         int indentOfLine = GetLineIndentation(line);
1270         if (indent < 0)
1271                 indent = 0;
1272         if (indent != indentOfLine) {
1273                 std::string linebuf = CreateIndentation(indent, tabInChars, !useTabs);
1274                 int thisLineStart = LineStart(line);
1275                 int indentPos = GetLineIndentPosition(line);
1276                 UndoGroup ug(this);
1277                 DeleteChars(thisLineStart, indentPos - thisLineStart);
1278                 return thisLineStart + InsertString(thisLineStart, linebuf.c_str(),
1279                         static_cast<int>(linebuf.length()));
1280         } else {
1281                 return GetLineIndentPosition(line);
1282         }
1283 }
1284
1285 int Document::GetLineIndentPosition(int line) const {
1286         if (line < 0)
1287                 return 0;
1288         int pos = LineStart(line);
1289         int length = Length();
1290         while ((pos < length) && IsSpaceOrTab(cb.CharAt(pos))) {
1291                 pos++;
1292         }
1293         return pos;
1294 }
1295
1296 int Document::GetColumn(int pos) {
1297         int column = 0;
1298         int line = LineFromPosition(pos);
1299         if ((line >= 0) && (line < LinesTotal())) {
1300                 for (int i = LineStart(line); i < pos;) {
1301                         char ch = cb.CharAt(i);
1302                         if (ch == '\t') {
1303                                 column = NextTab(column, tabInChars);
1304                                 i++;
1305                         } else if (ch == '\r') {
1306                                 return column;
1307                         } else if (ch == '\n') {
1308                                 return column;
1309                         } else if (i >= Length()) {
1310                                 return column;
1311                         } else {
1312                                 column++;
1313                                 i = NextPosition(i, 1);
1314                         }
1315                 }
1316         }
1317         return column;
1318 }
1319
1320 int Document::CountCharacters(int startPos, int endPos) const {
1321         startPos = MovePositionOutsideChar(startPos, 1, false);
1322         endPos = MovePositionOutsideChar(endPos, -1, false);
1323         int count = 0;
1324         int i = startPos;
1325         while (i < endPos) {
1326                 count++;
1327                 if (IsCrLf(i))
1328                         i++;
1329                 i = NextPosition(i, 1);
1330         }
1331         return count;
1332 }
1333
1334 int Document::CountUTF16(int startPos, int endPos) const {
1335         startPos = MovePositionOutsideChar(startPos, 1, false);
1336         endPos = MovePositionOutsideChar(endPos, -1, false);
1337         int count = 0;
1338         int i = startPos;
1339         while (i < endPos) {
1340                 count++;
1341                 const int next = NextPosition(i, 1);
1342                 if ((next - i) > 3)
1343                         count++;
1344                 i = next;
1345         }
1346         return count;
1347 }
1348
1349 int Document::FindColumn(int line, int column) {
1350         int position = LineStart(line);
1351         if ((line >= 0) && (line < LinesTotal())) {
1352                 int columnCurrent = 0;
1353                 while ((columnCurrent < column) && (position < Length())) {
1354                         char ch = cb.CharAt(position);
1355                         if (ch == '\t') {
1356                                 columnCurrent = NextTab(columnCurrent, tabInChars);
1357                                 if (columnCurrent > column)
1358                                         return position;
1359                                 position++;
1360                         } else if (ch == '\r') {
1361                                 return position;
1362                         } else if (ch == '\n') {
1363                                 return position;
1364                         } else {
1365                                 columnCurrent++;
1366                                 position = NextPosition(position, 1);
1367                         }
1368                 }
1369         }
1370         return position;
1371 }
1372
1373 void Document::Indent(bool forwards, int lineBottom, int lineTop) {
1374         // Dedent - suck white space off the front of the line to dedent by equivalent of a tab
1375         for (int line = lineBottom; line >= lineTop; line--) {
1376                 int indentOfLine = GetLineIndentation(line);
1377                 if (forwards) {
1378                         if (LineStart(line) < LineEnd(line)) {
1379                                 SetLineIndentation(line, indentOfLine + IndentSize());
1380                         }
1381                 } else {
1382                         SetLineIndentation(line, indentOfLine - IndentSize());
1383                 }
1384         }
1385 }
1386
1387 // Convert line endings for a piece of text to a particular mode.
1388 // Stop at len or when a NUL is found.
1389 std::string Document::TransformLineEnds(const char *s, size_t len, int eolModeWanted) {
1390         std::string dest;
1391         for (size_t i = 0; (i < len) && (s[i]); i++) {
1392                 if (s[i] == '\n' || s[i] == '\r') {
1393                         if (eolModeWanted == SC_EOL_CR) {
1394                                 dest.push_back('\r');
1395                         } else if (eolModeWanted == SC_EOL_LF) {
1396                                 dest.push_back('\n');
1397                         } else { // eolModeWanted == SC_EOL_CRLF
1398                                 dest.push_back('\r');
1399                                 dest.push_back('\n');
1400                         }
1401                         if ((s[i] == '\r') && (i+1 < len) && (s[i+1] == '\n')) {
1402                                 i++;
1403                         }
1404                 } else {
1405                         dest.push_back(s[i]);
1406                 }
1407         }
1408         return dest;
1409 }
1410
1411 void Document::ConvertLineEnds(int eolModeSet) {
1412         UndoGroup ug(this);
1413
1414         for (int pos = 0; pos < Length(); pos++) {
1415                 if (cb.CharAt(pos) == '\r') {
1416                         if (cb.CharAt(pos + 1) == '\n') {
1417                                 // CRLF
1418                                 if (eolModeSet == SC_EOL_CR) {
1419                                         DeleteChars(pos + 1, 1); // Delete the LF
1420                                 } else if (eolModeSet == SC_EOL_LF) {
1421                                         DeleteChars(pos, 1); // Delete the CR
1422                                 } else {
1423                                         pos++;
1424                                 }
1425                         } else {
1426                                 // CR
1427                                 if (eolModeSet == SC_EOL_CRLF) {
1428                                         pos += InsertString(pos + 1, "\n", 1); // Insert LF
1429                                 } else if (eolModeSet == SC_EOL_LF) {
1430                                         pos += InsertString(pos, "\n", 1); // Insert LF
1431                                         DeleteChars(pos, 1); // Delete CR
1432                                         pos--;
1433                                 }
1434                         }
1435                 } else if (cb.CharAt(pos) == '\n') {
1436                         // LF
1437                         if (eolModeSet == SC_EOL_CRLF) {
1438                                 pos += InsertString(pos, "\r", 1); // Insert CR
1439                         } else if (eolModeSet == SC_EOL_CR) {
1440                                 pos += InsertString(pos, "\r", 1); // Insert CR
1441                                 DeleteChars(pos, 1); // Delete LF
1442                                 pos--;
1443                         }
1444                 }
1445         }
1446
1447 }
1448
1449 bool Document::IsWhiteLine(int line) const {
1450         int currentChar = LineStart(line);
1451         int endLine = LineEnd(line);
1452         while (currentChar < endLine) {
1453                 if (cb.CharAt(currentChar) != ' ' && cb.CharAt(currentChar) != '\t') {
1454                         return false;
1455                 }
1456                 ++currentChar;
1457         }
1458         return true;
1459 }
1460
1461 int Document::ParaUp(int pos) const {
1462         int line = LineFromPosition(pos);
1463         line--;
1464         while (line >= 0 && IsWhiteLine(line)) { // skip empty lines
1465                 line--;
1466         }
1467         while (line >= 0 && !IsWhiteLine(line)) { // skip non-empty lines
1468                 line--;
1469         }
1470         line++;
1471         return LineStart(line);
1472 }
1473
1474 int Document::ParaDown(int pos) const {
1475         int line = LineFromPosition(pos);
1476         while (line < LinesTotal() && !IsWhiteLine(line)) { // skip non-empty lines
1477                 line++;
1478         }
1479         while (line < LinesTotal() && IsWhiteLine(line)) { // skip empty lines
1480                 line++;
1481         }
1482         if (line < LinesTotal())
1483                 return LineStart(line);
1484         else // end of a document
1485                 return LineEnd(line-1);
1486 }
1487
1488 CharClassify::cc Document::WordCharClass(unsigned char ch) const {
1489         if ((SC_CP_UTF8 == dbcsCodePage) && (!UTF8IsAscii(ch)))
1490                 return CharClassify::ccWord;
1491         return charClass.GetClass(ch);
1492 }
1493
1494 /**
1495  * Used by commmands that want to select whole words.
1496  * Finds the start of word at pos when delta < 0 or the end of the word when delta >= 0.
1497  */
1498 int Document::ExtendWordSelect(int pos, int delta, bool onlyWordCharacters) {
1499         CharClassify::cc ccStart = CharClassify::ccWord;
1500         if (delta < 0) {
1501                 if (!onlyWordCharacters)
1502                         ccStart = WordCharClass(cb.CharAt(pos-1));
1503                 while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == ccStart))
1504                         pos--;
1505         } else {
1506                 if (!onlyWordCharacters && pos < Length())
1507                         ccStart = WordCharClass(cb.CharAt(pos));
1508                 while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == ccStart))
1509                         pos++;
1510         }
1511         return MovePositionOutsideChar(pos, delta, true);
1512 }
1513
1514 /**
1515  * Find the start of the next word in either a forward (delta >= 0) or backwards direction
1516  * (delta < 0).
1517  * This is looking for a transition between character classes although there is also some
1518  * additional movement to transit white space.
1519  * Used by cursor movement by word commands.
1520  */
1521 int Document::NextWordStart(int pos, int delta) {
1522         if (delta < 0) {
1523                 while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == CharClassify::ccSpace))
1524                         pos--;
1525                 if (pos > 0) {
1526                         CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos-1));
1527                         while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == ccStart)) {
1528                                 pos--;
1529                         }
1530                 }
1531         } else {
1532                 CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos));
1533                 while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == ccStart))
1534                         pos++;
1535                 while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == CharClassify::ccSpace))
1536                         pos++;
1537         }
1538         return pos;
1539 }
1540
1541 /**
1542  * Find the end of the next word in either a forward (delta >= 0) or backwards direction
1543  * (delta < 0).
1544  * This is looking for a transition between character classes although there is also some
1545  * additional movement to transit white space.
1546  * Used by cursor movement by word commands.
1547  */
1548 int Document::NextWordEnd(int pos, int delta) {
1549         if (delta < 0) {
1550                 if (pos > 0) {
1551                         CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos-1));
1552                         if (ccStart != CharClassify::ccSpace) {
1553                                 while (pos > 0 && WordCharClass(cb.CharAt(pos - 1)) == ccStart) {
1554                                         pos--;
1555                                 }
1556                         }
1557                         while (pos > 0 && WordCharClass(cb.CharAt(pos - 1)) == CharClassify::ccSpace) {
1558                                 pos--;
1559                         }
1560                 }
1561         } else {
1562                 while (pos < Length() && WordCharClass(cb.CharAt(pos)) == CharClassify::ccSpace) {
1563                         pos++;
1564                 }
1565                 if (pos < Length()) {
1566                         CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos));
1567                         while (pos < Length() && WordCharClass(cb.CharAt(pos)) == ccStart) {
1568                                 pos++;
1569                         }
1570                 }
1571         }
1572         return pos;
1573 }
1574
1575 /**
1576  * Check that the character at the given position is a word or punctuation character and that
1577  * the previous character is of a different character class.
1578  */
1579 bool Document::IsWordStartAt(int pos) const {
1580         if (pos > 0) {
1581                 CharClassify::cc ccPos = WordCharClass(CharAt(pos));
1582                 return (ccPos == CharClassify::ccWord || ccPos == CharClassify::ccPunctuation) &&
1583                         (ccPos != WordCharClass(CharAt(pos - 1)));
1584         }
1585         return true;
1586 }
1587
1588 /**
1589  * Check that the character at the given position is a word or punctuation character and that
1590  * the next character is of a different character class.
1591  */
1592 bool Document::IsWordEndAt(int pos) const {
1593         if (pos < Length()) {
1594                 CharClassify::cc ccPrev = WordCharClass(CharAt(pos-1));
1595                 return (ccPrev == CharClassify::ccWord || ccPrev == CharClassify::ccPunctuation) &&
1596                         (ccPrev != WordCharClass(CharAt(pos)));
1597         }
1598         return true;
1599 }
1600
1601 /**
1602  * Check that the given range is has transitions between character classes at both
1603  * ends and where the characters on the inside are word or punctuation characters.
1604  */
1605 bool Document::IsWordAt(int start, int end) const {
1606         return IsWordStartAt(start) && IsWordEndAt(end);
1607 }
1608
1609 bool Document::MatchesWordOptions(bool word, bool wordStart, int pos, int length) const {
1610         return (!word && !wordStart) ||
1611                         (word && IsWordAt(pos, pos + length)) ||
1612                         (wordStart && IsWordStartAt(pos));
1613 }
1614
1615 bool Document::HasCaseFolder(void) const {
1616         return pcf != 0;
1617 }
1618
1619 void Document::SetCaseFolder(CaseFolder *pcf_) {
1620         delete pcf;
1621         pcf = pcf_;
1622 }
1623
1624 Document::CharacterExtracted Document::ExtractCharacter(int position) const {
1625         const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(position));
1626         if (UTF8IsAscii(leadByte)) {
1627                 // Common case: ASCII character
1628                 return CharacterExtracted(leadByte, 1);
1629         }
1630         const int widthCharBytes = UTF8BytesOfLead[leadByte];
1631         unsigned char charBytes[UTF8MaxBytes] = { leadByte, 0, 0, 0 };
1632         for (int b=1; b<widthCharBytes; b++)
1633                 charBytes[b] = static_cast<unsigned char>(cb.CharAt(position + b));
1634         int utf8status = UTF8Classify(charBytes, widthCharBytes);
1635         if (utf8status & UTF8MaskInvalid) {
1636                 // Treat as invalid and use up just one byte
1637                 return CharacterExtracted(unicodeReplacementChar, 1);
1638         } else {
1639                 return CharacterExtracted(UnicodeFromUTF8(charBytes), utf8status & UTF8MaskWidth);
1640         }
1641 }
1642
1643 /**
1644  * Find text in document, supporting both forward and backward
1645  * searches (just pass minPos > maxPos to do a backward search)
1646  * Has not been tested with backwards DBCS searches yet.
1647  */
1648 long Document::FindText(int minPos, int maxPos, const char *search,
1649                         bool caseSensitive, bool word, bool wordStart, bool regExp, int flags,
1650                         int *length) {
1651         if (*length <= 0)
1652                 return minPos;
1653         if (regExp) {
1654                 if (!regex)
1655                         regex = CreateRegexSearch(&charClass);
1656                 return regex->FindText(this, minPos, maxPos, search, caseSensitive, word, wordStart, flags, length);
1657         } else {
1658
1659                 const bool forward = minPos <= maxPos;
1660                 const int increment = forward ? 1 : -1;
1661
1662                 // Range endpoints should not be inside DBCS characters, but just in case, move them.
1663                 const int startPos = MovePositionOutsideChar(minPos, increment, false);
1664                 const int endPos = MovePositionOutsideChar(maxPos, increment, false);
1665
1666                 // Compute actual search ranges needed
1667                 const int lengthFind = *length;
1668
1669                 //Platform::DebugPrintf("Find %d %d %s %d\n", startPos, endPos, ft->lpstrText, lengthFind);
1670                 const int limitPos = Platform::Maximum(startPos, endPos);
1671                 int pos = startPos;
1672                 if (!forward) {
1673                         // Back all of a character
1674                         pos = NextPosition(pos, increment);
1675                 }
1676                 if (caseSensitive) {
1677                         const int endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos;
1678                         const char charStartSearch =  search[0];
1679                         while (forward ? (pos < endSearch) : (pos >= endSearch)) {
1680                                 if (CharAt(pos) == charStartSearch) {
1681                                         bool found = (pos + lengthFind) <= limitPos;
1682                                         for (int indexSearch = 1; (indexSearch < lengthFind) && found; indexSearch++) {
1683                                                 found = CharAt(pos + indexSearch) == search[indexSearch];
1684                                         }
1685                                         if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
1686                                                 return pos;
1687                                         }
1688                                 }
1689                                 if (!NextCharacter(pos, increment))
1690                                         break;
1691                         }
1692                 } else if (SC_CP_UTF8 == dbcsCodePage) {
1693                         const size_t maxFoldingExpansion = 4;
1694                         std::vector<char> searchThing(lengthFind * UTF8MaxBytes * maxFoldingExpansion + 1);
1695                         const int lenSearch = static_cast<int>(
1696                                 pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind));
1697                         char bytes[UTF8MaxBytes + 1];
1698                         char folded[UTF8MaxBytes * maxFoldingExpansion + 1];
1699                         while (forward ? (pos < endPos) : (pos >= endPos)) {
1700                                 int widthFirstCharacter = 0;
1701                                 int posIndexDocument = pos;
1702                                 int indexSearch = 0;
1703                                 bool characterMatches = true;
1704                                 for (;;) {
1705                                         const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(posIndexDocument));
1706                                         bytes[0] = leadByte;
1707                                         int widthChar = 1;
1708                                         if (!UTF8IsAscii(leadByte)) {
1709                                                 const int widthCharBytes = UTF8BytesOfLead[leadByte];
1710                                                 for (int b=1; b<widthCharBytes; b++) {
1711                                                         bytes[b] = cb.CharAt(posIndexDocument+b);
1712                                                 }
1713                                                 widthChar = UTF8Classify(reinterpret_cast<const unsigned char *>(bytes), widthCharBytes) & UTF8MaskWidth;
1714                                         }
1715                                         if (!widthFirstCharacter)
1716                                                 widthFirstCharacter = widthChar;
1717                                         if ((posIndexDocument + widthChar) > limitPos)
1718                                                 break;
1719                                         const int lenFlat = static_cast<int>(pcf->Fold(folded, sizeof(folded), bytes, widthChar));
1720                                         folded[lenFlat] = 0;
1721                                         // Does folded match the buffer
1722                                         characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat);
1723                                         if (!characterMatches)
1724                                                 break;
1725                                         posIndexDocument += widthChar;
1726                                         indexSearch += lenFlat;
1727                                         if (indexSearch >= lenSearch)
1728                                                 break;
1729                                 }
1730                                 if (characterMatches && (indexSearch == static_cast<int>(lenSearch))) {
1731                                         if (MatchesWordOptions(word, wordStart, pos, posIndexDocument - pos)) {
1732                                                 *length = posIndexDocument - pos;
1733                                                 return pos;
1734                                         }
1735                                 }
1736                                 if (forward) {
1737                                         pos += widthFirstCharacter;
1738                                 } else {
1739                                         if (!NextCharacter(pos, increment))
1740                                                 break;
1741                                 }
1742                         }
1743                 } else if (dbcsCodePage) {
1744                         const size_t maxBytesCharacter = 2;
1745                         const size_t maxFoldingExpansion = 4;
1746                         std::vector<char> searchThing(lengthFind * maxBytesCharacter * maxFoldingExpansion + 1);
1747                         const int lenSearch = static_cast<int>(
1748                                 pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind));
1749                         while (forward ? (pos < endPos) : (pos >= endPos)) {
1750                                 int indexDocument = 0;
1751                                 int indexSearch = 0;
1752                                 bool characterMatches = true;
1753                                 while (characterMatches &&
1754                                         ((pos + indexDocument) < limitPos) &&
1755                                         (indexSearch < lenSearch)) {
1756                                         char bytes[maxBytesCharacter + 1];
1757                                         bytes[0] = cb.CharAt(pos + indexDocument);
1758                                         const int widthChar = IsDBCSLeadByte(bytes[0]) ? 2 : 1;
1759                                         if (widthChar == 2)
1760                                                 bytes[1] = cb.CharAt(pos + indexDocument + 1);
1761                                         if ((pos + indexDocument + widthChar) > limitPos)
1762                                                 break;
1763                                         char folded[maxBytesCharacter * maxFoldingExpansion + 1];
1764                                         const int lenFlat = static_cast<int>(pcf->Fold(folded, sizeof(folded), bytes, widthChar));
1765                                         folded[lenFlat] = 0;
1766                                         // Does folded match the buffer
1767                                         characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat);
1768                                         indexDocument += widthChar;
1769                                         indexSearch += lenFlat;
1770                                 }
1771                                 if (characterMatches && (indexSearch == static_cast<int>(lenSearch))) {
1772                                         if (MatchesWordOptions(word, wordStart, pos, indexDocument)) {
1773                                                 *length = indexDocument;
1774                                                 return pos;
1775                                         }
1776                                 }
1777                                 if (!NextCharacter(pos, increment))
1778                                         break;
1779                         }
1780                 } else {
1781                         const int endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos;
1782                         std::vector<char> searchThing(lengthFind + 1);
1783                         pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind);
1784                         while (forward ? (pos < endSearch) : (pos >= endSearch)) {
1785                                 bool found = (pos + lengthFind) <= limitPos;
1786                                 for (int indexSearch = 0; (indexSearch < lengthFind) && found; indexSearch++) {
1787                                         char ch = CharAt(pos + indexSearch);
1788                                         char folded[2];
1789                                         pcf->Fold(folded, sizeof(folded), &ch, 1);
1790                                         found = folded[0] == searchThing[indexSearch];
1791                                 }
1792                                 if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
1793                                         return pos;
1794                                 }
1795                                 if (!NextCharacter(pos, increment))
1796                                         break;
1797                         }
1798                 }
1799         }
1800         //Platform::DebugPrintf("Not found\n");
1801         return -1;
1802 }
1803
1804 const char *Document::SubstituteByPosition(const char *text, int *length) {
1805         if (regex)
1806                 return regex->SubstituteByPosition(this, text, length);
1807         else
1808                 return 0;
1809 }
1810
1811 int Document::LinesTotal() const {
1812         return cb.Lines();
1813 }
1814
1815 void Document::SetDefaultCharClasses(bool includeWordClass) {
1816     charClass.SetDefaultCharClasses(includeWordClass);
1817 }
1818
1819 void Document::SetCharClasses(const unsigned char *chars, CharClassify::cc newCharClass) {
1820     charClass.SetCharClasses(chars, newCharClass);
1821 }
1822
1823 int Document::GetCharsOfClass(CharClassify::cc characterClass, unsigned char *buffer) {
1824     return charClass.GetCharsOfClass(characterClass, buffer);
1825 }
1826
1827 void SCI_METHOD Document::StartStyling(int position, char) {
1828         endStyled = position;
1829 }
1830
1831 bool SCI_METHOD Document::SetStyleFor(int length, char style) {
1832         if (enteredStyling != 0) {
1833                 return false;
1834         } else {
1835                 enteredStyling++;
1836                 int prevEndStyled = endStyled;
1837                 if (cb.SetStyleFor(endStyled, length, style)) {
1838                         DocModification mh(SC_MOD_CHANGESTYLE | SC_PERFORMED_USER,
1839                                            prevEndStyled, length);
1840                         NotifyModified(mh);
1841                 }
1842                 endStyled += length;
1843                 enteredStyling--;
1844                 return true;
1845         }
1846 }
1847
1848 bool SCI_METHOD Document::SetStyles(int length, const char *styles) {
1849         if (enteredStyling != 0) {
1850                 return false;
1851         } else {
1852                 enteredStyling++;
1853                 bool didChange = false;
1854                 int startMod = 0;
1855                 int endMod = 0;
1856                 for (int iPos = 0; iPos < length; iPos++, endStyled++) {
1857                         PLATFORM_ASSERT(endStyled < Length());
1858                         if (cb.SetStyleAt(endStyled, styles[iPos])) {
1859                                 if (!didChange) {
1860                                         startMod = endStyled;
1861                                 }
1862                                 didChange = true;
1863                                 endMod = endStyled;
1864                         }
1865                 }
1866                 if (didChange) {
1867                         DocModification mh(SC_MOD_CHANGESTYLE | SC_PERFORMED_USER,
1868                                            startMod, endMod - startMod + 1);
1869                         NotifyModified(mh);
1870                 }
1871                 enteredStyling--;
1872                 return true;
1873         }
1874 }
1875
1876 void Document::EnsureStyledTo(int pos) {
1877         if ((enteredStyling == 0) && (pos > GetEndStyled())) {
1878                 IncrementStyleClock();
1879                 if (pli && !pli->UseContainerLexing()) {
1880                         int lineEndStyled = LineFromPosition(GetEndStyled());
1881                         int endStyledTo = LineStart(lineEndStyled);
1882                         pli->Colourise(endStyledTo, pos);
1883                 } else {
1884                         // Ask the watchers to style, and stop as soon as one responds.
1885                         for (std::vector<WatcherWithUserData>::iterator it = watchers.begin();
1886                                 (pos > GetEndStyled()) && (it != watchers.end()); ++it) {
1887                                 it->watcher->NotifyStyleNeeded(this, it->userData, pos);
1888                         }
1889                 }
1890         }
1891 }
1892
1893 void Document::LexerChanged() {
1894         // Tell the watchers the lexer has changed.
1895         for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
1896                 it->watcher->NotifyLexerChanged(this, it->userData);
1897         }
1898 }
1899
1900 int SCI_METHOD Document::SetLineState(int line, int state) {
1901         int statePrevious = static_cast<LineState *>(perLineData[ldState])->SetLineState(line, state);
1902         if (state != statePrevious) {
1903                 DocModification mh(SC_MOD_CHANGELINESTATE, LineStart(line), 0, 0, 0, line);
1904                 NotifyModified(mh);
1905         }
1906         return statePrevious;
1907 }
1908
1909 int SCI_METHOD Document::GetLineState(int line) const {
1910         return static_cast<LineState *>(perLineData[ldState])->GetLineState(line);
1911 }
1912
1913 int Document::GetMaxLineState() {
1914         return static_cast<LineState *>(perLineData[ldState])->GetMaxLineState();
1915 }
1916
1917 void SCI_METHOD Document::ChangeLexerState(int start, int end) {
1918         DocModification mh(SC_MOD_LEXERSTATE, start, end-start, 0, 0, 0);
1919         NotifyModified(mh);
1920 }
1921
1922 StyledText Document::MarginStyledText(int line) const {
1923         LineAnnotation *pla = static_cast<LineAnnotation *>(perLineData[ldMargin]);
1924         return StyledText(pla->Length(line), pla->Text(line),
1925                 pla->MultipleStyles(line), pla->Style(line), pla->Styles(line));
1926 }
1927
1928 void Document::MarginSetText(int line, const char *text) {
1929         static_cast<LineAnnotation *>(perLineData[ldMargin])->SetText(line, text);
1930         DocModification mh(SC_MOD_CHANGEMARGIN, LineStart(line), 0, 0, 0, line);
1931         NotifyModified(mh);
1932 }
1933
1934 void Document::MarginSetStyle(int line, int style) {
1935         static_cast<LineAnnotation *>(perLineData[ldMargin])->SetStyle(line, style);
1936         NotifyModified(DocModification(SC_MOD_CHANGEMARGIN, LineStart(line), 0, 0, 0, line));
1937 }
1938
1939 void Document::MarginSetStyles(int line, const unsigned char *styles) {
1940         static_cast<LineAnnotation *>(perLineData[ldMargin])->SetStyles(line, styles);
1941         NotifyModified(DocModification(SC_MOD_CHANGEMARGIN, LineStart(line), 0, 0, 0, line));
1942 }
1943
1944 void Document::MarginClearAll() {
1945         int maxEditorLine = LinesTotal();
1946         for (int l=0; l<maxEditorLine; l++)
1947                 MarginSetText(l, 0);
1948         // Free remaining data
1949         static_cast<LineAnnotation *>(perLineData[ldMargin])->ClearAll();
1950 }
1951
1952 StyledText Document::AnnotationStyledText(int line) const {
1953         LineAnnotation *pla = static_cast<LineAnnotation *>(perLineData[ldAnnotation]);
1954         return StyledText(pla->Length(line), pla->Text(line),
1955                 pla->MultipleStyles(line), pla->Style(line), pla->Styles(line));
1956 }
1957
1958 void Document::AnnotationSetText(int line, const char *text) {
1959         if (line >= 0 && line < LinesTotal()) {
1960                 const int linesBefore = AnnotationLines(line);
1961                 static_cast<LineAnnotation *>(perLineData[ldAnnotation])->SetText(line, text);
1962                 const int linesAfter = AnnotationLines(line);
1963                 DocModification mh(SC_MOD_CHANGEANNOTATION, LineStart(line), 0, 0, 0, line);
1964                 mh.annotationLinesAdded = linesAfter - linesBefore;
1965                 NotifyModified(mh);
1966         }
1967 }
1968
1969 void Document::AnnotationSetStyle(int line, int style) {
1970         static_cast<LineAnnotation *>(perLineData[ldAnnotation])->SetStyle(line, style);
1971         DocModification mh(SC_MOD_CHANGEANNOTATION, LineStart(line), 0, 0, 0, line);
1972         NotifyModified(mh);
1973 }
1974
1975 void Document::AnnotationSetStyles(int line, const unsigned char *styles) {
1976         if (line >= 0 && line < LinesTotal()) {
1977                 static_cast<LineAnnotation *>(perLineData[ldAnnotation])->SetStyles(line, styles);
1978         }
1979 }
1980
1981 int Document::AnnotationLines(int line) const {
1982         return static_cast<LineAnnotation *>(perLineData[ldAnnotation])->Lines(line);
1983 }
1984
1985 void Document::AnnotationClearAll() {
1986         int maxEditorLine = LinesTotal();
1987         for (int l=0; l<maxEditorLine; l++)
1988                 AnnotationSetText(l, 0);
1989         // Free remaining data
1990         static_cast<LineAnnotation *>(perLineData[ldAnnotation])->ClearAll();
1991 }
1992
1993 void Document::IncrementStyleClock() {
1994         styleClock = (styleClock + 1) % 0x100000;
1995 }
1996
1997 void SCI_METHOD Document::DecorationFillRange(int position, int value, int fillLength) {
1998         if (decorations.FillRange(position, value, fillLength)) {
1999                 DocModification mh(SC_MOD_CHANGEINDICATOR | SC_PERFORMED_USER,
2000                                                         position, fillLength);
2001                 NotifyModified(mh);
2002         }
2003 }
2004
2005 bool Document::AddWatcher(DocWatcher *watcher, void *userData) {
2006         WatcherWithUserData wwud(watcher, userData);
2007         std::vector<WatcherWithUserData>::iterator it =
2008                 std::find(watchers.begin(), watchers.end(), wwud);
2009         if (it != watchers.end())
2010                 return false;
2011         watchers.push_back(wwud);
2012         return true;
2013 }
2014
2015 bool Document::RemoveWatcher(DocWatcher *watcher, void *userData) {
2016         std::vector<WatcherWithUserData>::iterator it =
2017                 std::find(watchers.begin(), watchers.end(), WatcherWithUserData(watcher, userData));
2018         if (it != watchers.end()) {
2019                 watchers.erase(it);
2020                 return true;
2021         }
2022         return false;
2023 }
2024
2025 void Document::NotifyModifyAttempt() {
2026         for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
2027                 it->watcher->NotifyModifyAttempt(this, it->userData);
2028         }
2029 }
2030
2031 void Document::NotifySavePoint(bool atSavePoint) {
2032         for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
2033                 it->watcher->NotifySavePoint(this, it->userData, atSavePoint);
2034         }
2035 }
2036
2037 void Document::NotifyModified(DocModification mh) {
2038         if (mh.modificationType & SC_MOD_INSERTTEXT) {
2039                 decorations.InsertSpace(mh.position, mh.length);
2040         } else if (mh.modificationType & SC_MOD_DELETETEXT) {
2041                 decorations.DeleteRange(mh.position, mh.length);
2042         }
2043         for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
2044                 it->watcher->NotifyModified(this, mh, it->userData);
2045         }
2046 }
2047
2048 bool Document::IsWordPartSeparator(char ch) const {
2049         return (WordCharClass(ch) == CharClassify::ccWord) && IsPunctuation(ch);
2050 }
2051
2052 int Document::WordPartLeft(int pos) {
2053         if (pos > 0) {
2054                 --pos;
2055                 char startChar = cb.CharAt(pos);
2056                 if (IsWordPartSeparator(startChar)) {
2057                         while (pos > 0 && IsWordPartSeparator(cb.CharAt(pos))) {
2058                                 --pos;
2059                         }
2060                 }
2061                 if (pos > 0) {
2062                         startChar = cb.CharAt(pos);
2063                         --pos;
2064                         if (IsLowerCase(startChar)) {
2065                                 while (pos > 0 && IsLowerCase(cb.CharAt(pos)))
2066                                         --pos;
2067                                 if (!IsUpperCase(cb.CharAt(pos)) && !IsLowerCase(cb.CharAt(pos)))
2068                                         ++pos;
2069                         } else if (IsUpperCase(startChar)) {
2070                                 while (pos > 0 && IsUpperCase(cb.CharAt(pos)))
2071                                         --pos;
2072                                 if (!IsUpperCase(cb.CharAt(pos)))
2073                                         ++pos;
2074                         } else if (IsADigit(startChar)) {
2075                                 while (pos > 0 && IsADigit(cb.CharAt(pos)))
2076                                         --pos;
2077                                 if (!IsADigit(cb.CharAt(pos)))
2078                                         ++pos;
2079                         } else if (IsPunctuation(startChar)) {
2080                                 while (pos > 0 && IsPunctuation(cb.CharAt(pos)))
2081                                         --pos;
2082                                 if (!IsPunctuation(cb.CharAt(pos)))
2083                                         ++pos;
2084                         } else if (isspacechar(startChar)) {
2085                                 while (pos > 0 && isspacechar(cb.CharAt(pos)))
2086                                         --pos;
2087                                 if (!isspacechar(cb.CharAt(pos)))
2088                                         ++pos;
2089                         } else if (!IsASCII(startChar)) {
2090                                 while (pos > 0 && !IsASCII(cb.CharAt(pos)))
2091                                         --pos;
2092                                 if (IsASCII(cb.CharAt(pos)))
2093                                         ++pos;
2094                         } else {
2095                                 ++pos;
2096                         }
2097                 }
2098         }
2099         return pos;
2100 }
2101
2102 int Document::WordPartRight(int pos) {
2103         char startChar = cb.CharAt(pos);
2104         int length = Length();
2105         if (IsWordPartSeparator(startChar)) {
2106                 while (pos < length && IsWordPartSeparator(cb.CharAt(pos)))
2107                         ++pos;
2108                 startChar = cb.CharAt(pos);
2109         }
2110         if (!IsASCII(startChar)) {
2111                 while (pos < length && !IsASCII(cb.CharAt(pos)))
2112                         ++pos;
2113         } else if (IsLowerCase(startChar)) {
2114                 while (pos < length && IsLowerCase(cb.CharAt(pos)))
2115                         ++pos;
2116         } else if (IsUpperCase(startChar)) {
2117                 if (IsLowerCase(cb.CharAt(pos + 1))) {
2118                         ++pos;
2119                         while (pos < length && IsLowerCase(cb.CharAt(pos)))
2120                                 ++pos;
2121                 } else {
2122                         while (pos < length && IsUpperCase(cb.CharAt(pos)))
2123                                 ++pos;
2124                 }
2125                 if (IsLowerCase(cb.CharAt(pos)) && IsUpperCase(cb.CharAt(pos - 1)))
2126                         --pos;
2127         } else if (IsADigit(startChar)) {
2128                 while (pos < length && IsADigit(cb.CharAt(pos)))
2129                         ++pos;
2130         } else if (IsPunctuation(startChar)) {
2131                 while (pos < length && IsPunctuation(cb.CharAt(pos)))
2132                         ++pos;
2133         } else if (isspacechar(startChar)) {
2134                 while (pos < length && isspacechar(cb.CharAt(pos)))
2135                         ++pos;
2136         } else {
2137                 ++pos;
2138         }
2139         return pos;
2140 }
2141
2142 bool IsLineEndChar(char c) {
2143         return (c == '\n' || c == '\r');
2144 }
2145
2146 int Document::ExtendStyleRange(int pos, int delta, bool singleLine) {
2147         int sStart = cb.StyleAt(pos);
2148         if (delta < 0) {
2149                 while (pos > 0 && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsLineEndChar(cb.CharAt(pos))))
2150                         pos--;
2151                 pos++;
2152         } else {
2153                 while (pos < (Length()) && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsLineEndChar(cb.CharAt(pos))))
2154                         pos++;
2155         }
2156         return pos;
2157 }
2158
2159 static char BraceOpposite(char ch) {
2160         switch (ch) {
2161         case '(':
2162                 return ')';
2163         case ')':
2164                 return '(';
2165         case '[':
2166                 return ']';
2167         case ']':
2168                 return '[';
2169         case '{':
2170                 return '}';
2171         case '}':
2172                 return '{';
2173         case '<':
2174                 return '>';
2175         case '>':
2176                 return '<';
2177         default:
2178                 return '\0';
2179         }
2180 }
2181
2182 // TODO: should be able to extend styled region to find matching brace
2183 int Document::BraceMatch(int position, int /*maxReStyle*/) {
2184         char chBrace = CharAt(position);
2185         char chSeek = BraceOpposite(chBrace);
2186         if (chSeek == '\0')
2187                 return - 1;
2188         char styBrace = static_cast<char>(StyleAt(position));
2189         int direction = -1;
2190         if (chBrace == '(' || chBrace == '[' || chBrace == '{' || chBrace == '<')
2191                 direction = 1;
2192         int depth = 1;
2193         position = NextPosition(position, direction);
2194         while ((position >= 0) && (position < Length())) {
2195                 char chAtPos = CharAt(position);
2196                 char styAtPos = static_cast<char>(StyleAt(position));
2197                 if ((position > GetEndStyled()) || (styAtPos == styBrace)) {
2198                         if (chAtPos == chBrace)
2199                                 depth++;
2200                         if (chAtPos == chSeek)
2201                                 depth--;
2202                         if (depth == 0)
2203                                 return position;
2204                 }
2205                 int positionBeforeMove = position;
2206                 position = NextPosition(position, direction);
2207                 if (position == positionBeforeMove)
2208                         break;
2209         }
2210         return - 1;
2211 }
2212
2213 /**
2214  * Implementation of RegexSearchBase for the default built-in regular expression engine
2215  */
2216 class BuiltinRegex : public RegexSearchBase {
2217 public:
2218         explicit BuiltinRegex(CharClassify *charClassTable) : search(charClassTable) {}
2219
2220         virtual ~BuiltinRegex() {
2221         }
2222
2223         virtual long FindText(Document *doc, int minPos, int maxPos, const char *s,
2224                         bool caseSensitive, bool word, bool wordStart, int flags,
2225                         int *length);
2226
2227         virtual const char *SubstituteByPosition(Document *doc, const char *text, int *length);
2228
2229 private:
2230         RESearch search;
2231         std::string substituted;
2232 };
2233
2234 namespace {
2235
2236 /**
2237 * RESearchRange keeps track of search range.
2238 */
2239 class RESearchRange {
2240 public:
2241         const Document *doc;
2242         int increment;
2243         int startPos;
2244         int endPos;
2245         int lineRangeStart;
2246         int lineRangeEnd;
2247         int lineRangeBreak;
2248         RESearchRange(const Document *doc_, int minPos, int maxPos) : doc(doc_) {
2249                 increment = (minPos <= maxPos) ? 1 : -1;
2250
2251                 // Range endpoints should not be inside DBCS characters, but just in case, move them.
2252                 startPos = doc->MovePositionOutsideChar(minPos, 1, false);
2253                 endPos = doc->MovePositionOutsideChar(maxPos, 1, false);
2254
2255                 lineRangeStart = doc->LineFromPosition(startPos);
2256                 lineRangeEnd = doc->LineFromPosition(endPos);
2257                 if ((increment == 1) &&
2258                         (startPos >= doc->LineEnd(lineRangeStart)) &&
2259                         (lineRangeStart < lineRangeEnd)) {
2260                         // the start position is at end of line or between line end characters.
2261                         lineRangeStart++;
2262                         startPos = doc->LineStart(lineRangeStart);
2263                 } else if ((increment == -1) &&
2264                         (startPos <= doc->LineStart(lineRangeStart)) &&
2265                         (lineRangeStart > lineRangeEnd)) {
2266                         // the start position is at beginning of line.
2267                         lineRangeStart--;
2268                         startPos = doc->LineEnd(lineRangeStart);
2269                 }
2270                 lineRangeBreak = lineRangeEnd + increment;
2271         }
2272         Range LineRange(int line) const {
2273                 Range range(doc->LineStart(line), doc->LineEnd(line));
2274                 if (increment == 1) {
2275                         if (line == lineRangeStart)
2276                                 range.start = startPos;
2277                         if (line == lineRangeEnd)
2278                                 range.end = endPos;
2279                 } else {
2280                         if (line == lineRangeEnd)
2281                                 range.start = endPos;
2282                         if (line == lineRangeStart)
2283                                 range.end = startPos;
2284                 }
2285                 return range;
2286         }
2287 };
2288
2289 // Define a way for the Regular Expression code to access the document
2290 class DocumentIndexer : public CharacterIndexer {
2291         Document *pdoc;
2292         int end;
2293 public:
2294         DocumentIndexer(Document *pdoc_, int end_) :
2295                 pdoc(pdoc_), end(end_) {
2296         }
2297
2298         virtual ~DocumentIndexer() {
2299         }
2300
2301         virtual char CharAt(int index) {
2302                 if (index < 0 || index >= end)
2303                         return 0;
2304                 else
2305                         return pdoc->CharAt(index);
2306         }
2307 };
2308
2309 #ifdef CXX11_REGEX
2310
2311 class ByteIterator : public std::iterator<std::bidirectional_iterator_tag, char> {
2312 public:
2313         const Document *doc;
2314         Position position;
2315         ByteIterator(const Document *doc_ = 0, Position position_ = 0) : doc(doc_), position(position_) {
2316         }
2317         ByteIterator(const ByteIterator &other) {
2318                 doc = other.doc;
2319                 position = other.position;
2320         }
2321         ByteIterator &operator=(const ByteIterator &other) {
2322                 if (this != &other) {
2323                         doc = other.doc;
2324                         position = other.position;
2325                 }
2326                 return *this;
2327         }
2328         char operator*() const {
2329                 return doc->CharAt(position);
2330         }
2331         ByteIterator &operator++() {
2332                 position++;
2333                 return *this;
2334         }
2335         ByteIterator operator++(int) {
2336                 ByteIterator retVal(*this);
2337                 position++;
2338                 return retVal;
2339         }
2340         ByteIterator &operator--() {
2341                 position--;
2342                 return *this;
2343         }
2344         bool operator==(const ByteIterator &other) const {
2345                 return doc == other.doc && position == other.position;
2346         }
2347         bool operator!=(const ByteIterator &other) const {
2348                 return doc != other.doc || position != other.position;
2349         }
2350         int Pos() const {
2351                 return position;
2352         }
2353         int PosRoundUp() const {
2354                 return position;
2355         }
2356 };
2357
2358 // On Windows, wchar_t is 16 bits wide and on Unix it is 32 bits wide.
2359 // Would be better to use sizeof(wchar_t) or similar to differentiate
2360 // but easier for now to hard-code platforms.
2361 // C++11 has char16_t and char32_t but neither Clang nor Visual C++
2362 // appear to allow specializing basic_regex over these.
2363
2364 #ifdef _WIN32
2365 #define WCHAR_T_IS_16 1
2366 #else
2367 #define WCHAR_T_IS_16 0
2368 #endif
2369
2370 #if WCHAR_T_IS_16
2371
2372 // On Windows, report non-BMP characters as 2 separate surrogates as that
2373 // matches wregex since it is based on wchar_t.
2374 class UTF8Iterator : public std::iterator<std::bidirectional_iterator_tag, wchar_t> {
2375         // These 3 fields determine the iterator position and are used for comparisons
2376         const Document *doc;
2377         Position position;
2378         size_t characterIndex;
2379         // Remaining fields are derived from the determining fields so are excluded in comparisons
2380         unsigned int lenBytes;
2381         size_t lenCharacters;
2382         wchar_t buffered[2];
2383 public:
2384         UTF8Iterator(const Document *doc_ = 0, Position position_ = 0) :
2385                 doc(doc_), position(position_), characterIndex(0), lenBytes(0), lenCharacters(0) {
2386                 buffered[0] = 0;
2387                 buffered[1] = 0;
2388                 if (doc) {
2389                         ReadCharacter();
2390                 }
2391         }
2392         UTF8Iterator(const UTF8Iterator &other) {
2393                 doc = other.doc;
2394                 position = other.position;
2395                 characterIndex = other.characterIndex;
2396                 lenBytes = other.lenBytes;
2397                 lenCharacters = other.lenCharacters;
2398                 buffered[0] = other.buffered[0];
2399                 buffered[1] = other.buffered[1];
2400         }
2401         UTF8Iterator &operator=(const UTF8Iterator &other) {
2402                 if (this != &other) {
2403                         doc = other.doc;
2404                         position = other.position;
2405                         characterIndex = other.characterIndex;
2406                         lenBytes = other.lenBytes;
2407                         lenCharacters = other.lenCharacters;
2408                         buffered[0] = other.buffered[0];
2409                         buffered[1] = other.buffered[1];
2410                 }
2411                 return *this;
2412         }
2413         wchar_t operator*() const {
2414                 assert(lenCharacters != 0);
2415                 return buffered[characterIndex];
2416         }
2417         UTF8Iterator &operator++() {
2418                 if ((characterIndex + 1) < (lenCharacters)) {
2419                         characterIndex++;
2420                 } else {
2421                         position += lenBytes;
2422                         ReadCharacter();
2423                         characterIndex = 0;
2424                 }
2425                 return *this;
2426         }
2427         UTF8Iterator operator++(int) {
2428                 UTF8Iterator retVal(*this);
2429                 if ((characterIndex + 1) < (lenCharacters)) {
2430                         characterIndex++;
2431                 } else {
2432                         position += lenBytes;
2433                         ReadCharacter();
2434                         characterIndex = 0;
2435                 }
2436                 return retVal;
2437         }
2438         UTF8Iterator &operator--() {
2439                 if (characterIndex) {
2440                         characterIndex--;
2441                 } else {
2442                         position = doc->NextPosition(position, -1);
2443                         ReadCharacter();
2444                         characterIndex = lenCharacters - 1;
2445                 }
2446                 return *this;
2447         }
2448         bool operator==(const UTF8Iterator &other) const {
2449                 // Only test the determining fields, not the character widths and values derived from this
2450                 return doc == other.doc &&
2451                         position == other.position &&
2452                         characterIndex == other.characterIndex;
2453         }
2454         bool operator!=(const UTF8Iterator &other) const {
2455                 // Only test the determining fields, not the character widths and values derived from this
2456                 return doc != other.doc ||
2457                         position != other.position ||
2458                         characterIndex != other.characterIndex;
2459         }
2460         int Pos() const {
2461                 return position;
2462         }
2463         int PosRoundUp() const {
2464                 if (characterIndex)
2465                         return position + lenBytes;     // Force to end of character
2466                 else
2467                         return position;
2468         }
2469 private:
2470         void ReadCharacter() {
2471                 Document::CharacterExtracted charExtracted = doc->ExtractCharacter(position);
2472                 lenBytes = charExtracted.widthBytes;
2473                 if (charExtracted.character == unicodeReplacementChar) {
2474                         lenCharacters = 1;
2475                         buffered[0] = static_cast<wchar_t>(charExtracted.character);
2476                 } else {
2477                         lenCharacters = UTF16FromUTF32Character(charExtracted.character, buffered);
2478                 }
2479         }
2480 };
2481
2482 #else
2483
2484 // On Unix, report non-BMP characters as single characters
2485
2486 class UTF8Iterator : public std::iterator<std::bidirectional_iterator_tag, wchar_t> {
2487         const Document *doc;
2488         Position position;
2489 public:
2490         UTF8Iterator(const Document *doc_=0, Position position_=0) : doc(doc_), position(position_) {
2491         }
2492         UTF8Iterator(const UTF8Iterator &other) {
2493                 doc = other.doc;
2494                 position = other.position;
2495         }
2496         UTF8Iterator &operator=(const UTF8Iterator &other) {
2497                 if (this != &other) {
2498                         doc = other.doc;
2499                         position = other.position;
2500                 }
2501                 return *this;
2502         }
2503         wchar_t operator*() const {
2504                 Document::CharacterExtracted charExtracted = doc->ExtractCharacter(position);
2505                 return charExtracted.character;
2506         }
2507         UTF8Iterator &operator++() {
2508                 position = doc->NextPosition(position, 1);
2509                 return *this;
2510         }
2511         UTF8Iterator operator++(int) {
2512                 UTF8Iterator retVal(*this);
2513                 position = doc->NextPosition(position, 1);
2514                 return retVal;
2515         }
2516         UTF8Iterator &operator--() {
2517                 position = doc->NextPosition(position, -1);
2518                 return *this;
2519         }
2520         bool operator==(const UTF8Iterator &other) const {
2521                 return doc == other.doc && position == other.position;
2522         }
2523         bool operator!=(const UTF8Iterator &other) const {
2524                 return doc != other.doc || position != other.position;
2525         }
2526         int Pos() const {
2527                 return position;
2528         }
2529         int PosRoundUp() const {
2530                 return position;
2531         }
2532 };
2533
2534 #endif
2535
2536 std::regex_constants::match_flag_type MatchFlags(const Document *doc, int startPos, int endPos) {
2537         std::regex_constants::match_flag_type flagsMatch = std::regex_constants::match_default;
2538         if (!doc->IsLineStartPosition(startPos))
2539                 flagsMatch |= std::regex_constants::match_not_bol;
2540         if (!doc->IsLineEndPosition(endPos))
2541                 flagsMatch |= std::regex_constants::match_not_eol;
2542         return flagsMatch;
2543 }
2544
2545 template<typename Iterator, typename Regex>
2546 bool MatchOnLines(const Document *doc, const Regex &regexp, const RESearchRange &resr, RESearch &search) {
2547         bool matched = false;
2548         std::match_results<Iterator> match;
2549
2550         // MSVC and libc++ have problems with ^ and $ matching line ends inside a range
2551         // If they didn't then the line by line iteration could be removed for the forwards
2552         // case and replaced with the following 4 lines:
2553         //      Iterator uiStart(doc, startPos);
2554         //      Iterator uiEnd(doc, endPos);
2555         //      flagsMatch = MatchFlags(doc, startPos, endPos);
2556         //      matched = std::regex_search(uiStart, uiEnd, match, regexp, flagsMatch);
2557
2558         // Line by line.
2559         for (int line = resr.lineRangeStart; line != resr.lineRangeBreak; line += resr.increment) {
2560                 const Range lineRange = resr.LineRange(line);
2561                 Iterator itStart(doc, lineRange.start);
2562                 Iterator itEnd(doc, lineRange.end);
2563                 std::regex_constants::match_flag_type flagsMatch = MatchFlags(doc, lineRange.start, lineRange.end);
2564                 matched = std::regex_search(itStart, itEnd, match, regexp, flagsMatch);
2565                 // Check for the last match on this line.
2566                 if (matched) {
2567                         if (resr.increment == -1) {
2568                                 while (matched) {
2569                                         Iterator itNext(doc, match[0].second.PosRoundUp());
2570                                         flagsMatch = MatchFlags(doc, itNext.Pos(), lineRange.end);
2571                                         std::match_results<Iterator> matchNext;
2572                                         matched = std::regex_search(itNext, itEnd, matchNext, regexp, flagsMatch);
2573                                         if (matched) {
2574                                                 if (match[0].first == match[0].second) {
2575                                                         // Empty match means failure so exit
2576                                                         return false;
2577                                                 }
2578                                                 match = matchNext;
2579                                         }
2580                                 }
2581                                 matched = true;
2582                         }
2583                         break;
2584                 }
2585         }
2586         if (matched) {
2587                 for (size_t co = 0; co < match.size(); co++) {
2588                         search.bopat[co] = match[co].first.Pos();
2589                         search.eopat[co] = match[co].second.PosRoundUp();
2590                         size_t lenMatch = search.eopat[co] - search.bopat[co];
2591                         search.pat[co].resize(lenMatch);
2592                         for (size_t iPos = 0; iPos < lenMatch; iPos++) {
2593                                 search.pat[co][iPos] = doc->CharAt(iPos + search.bopat[co]);
2594                         }
2595                 }
2596         }
2597         return matched;
2598 }
2599
2600 long Cxx11RegexFindText(Document *doc, int minPos, int maxPos, const char *s,
2601         bool caseSensitive, int *length, RESearch &search) {
2602         const RESearchRange resr(doc, minPos, maxPos);
2603         try {
2604                 //ElapsedTime et;
2605                 std::regex::flag_type flagsRe = std::regex::ECMAScript;
2606                 // Flags that apper to have no effect:
2607                 // | std::regex::collate | std::regex::extended;
2608                 if (!caseSensitive)
2609                         flagsRe = flagsRe | std::regex::icase;
2610
2611                 // Clear the RESearch so can fill in matches
2612                 search.Clear();
2613
2614                 bool matched = false;
2615                 if (SC_CP_UTF8 == doc->dbcsCodePage) {
2616                         unsigned int lenS = static_cast<unsigned int>(strlen(s));
2617                         std::vector<wchar_t> ws(lenS + 1);
2618 #if WCHAR_T_IS_16
2619                         size_t outLen = UTF16FromUTF8(s, lenS, &ws[0], lenS);
2620 #else
2621                         size_t outLen = UTF32FromUTF8(s, lenS, reinterpret_cast<unsigned int *>(&ws[0]), lenS);
2622 #endif
2623                         ws[outLen] = 0;
2624                         std::wregex regexp;
2625 #if defined(__APPLE__)
2626                         // Using a UTF-8 locale doesn't change to Unicode over a byte buffer so '.'
2627                         // is one byte not one character.
2628                         // However, on OS X this makes wregex act as Unicode
2629                         std::locale localeU("en_US.UTF-8");
2630                         regexp.imbue(localeU);
2631 #endif
2632                         regexp.assign(&ws[0], flagsRe);
2633                         matched = MatchOnLines<UTF8Iterator>(doc, regexp, resr, search);
2634
2635                 } else {
2636                         std::regex regexp;
2637                         regexp.assign(s, flagsRe);
2638                         matched = MatchOnLines<ByteIterator>(doc, regexp, resr, search);
2639                 }
2640
2641                 int posMatch = -1;
2642                 if (matched) {
2643                         posMatch = search.bopat[0];
2644                         *length = search.eopat[0] - search.bopat[0];
2645                 }
2646                 // Example - search in doc/ScintillaHistory.html for
2647                 // [[:upper:]]eta[[:space:]]
2648                 // On MacBook, normally around 1 second but with locale imbued -> 14 seconds.
2649                 //double durSearch = et.Duration(true);
2650                 //Platform::DebugPrintf("Search:%9.6g \n", durSearch);
2651                 return posMatch;
2652         } catch (std::regex_error &) {
2653                 // Failed to create regular expression
2654                 throw RegexError();
2655         } catch (...) {
2656                 // Failed in some other way
2657                 return -1;
2658         }
2659 }
2660
2661 #endif
2662
2663 }
2664
2665 long BuiltinRegex::FindText(Document *doc, int minPos, int maxPos, const char *s,
2666                         bool caseSensitive, bool, bool, int flags,
2667                         int *length) {
2668
2669 #ifdef CXX11_REGEX
2670         if (flags & SCFIND_CXX11REGEX) {
2671                         return Cxx11RegexFindText(doc, minPos, maxPos, s,
2672                         caseSensitive, length, search);
2673         }
2674 #endif
2675
2676         const RESearchRange resr(doc, minPos, maxPos);
2677
2678         const bool posix = (flags & SCFIND_POSIX) != 0;
2679
2680         const char *errmsg = search.Compile(s, *length, caseSensitive, posix);
2681         if (errmsg) {
2682                 return -1;
2683         }
2684         // Find a variable in a property file: \$(\([A-Za-z0-9_.]+\))
2685         // Replace first '.' with '-' in each property file variable reference:
2686         //     Search: \$(\([A-Za-z0-9_-]+\)\.\([A-Za-z0-9_.]+\))
2687         //     Replace: $(\1-\2)
2688         int pos = -1;
2689         int lenRet = 0;
2690         const char searchEnd = s[*length - 1];
2691         const char searchEndPrev = (*length > 1) ? s[*length - 2] : '\0';
2692         for (int line = resr.lineRangeStart; line != resr.lineRangeBreak; line += resr.increment) {
2693                 int startOfLine = doc->LineStart(line);
2694                 int endOfLine = doc->LineEnd(line);
2695                 if (resr.increment == 1) {
2696                         if (line == resr.lineRangeStart) {
2697                                 if ((resr.startPos != startOfLine) && (s[0] == '^'))
2698                                         continue;       // Can't match start of line if start position after start of line
2699                                 startOfLine = resr.startPos;
2700                         }
2701                         if (line == resr.lineRangeEnd) {
2702                                 if ((resr.endPos != endOfLine) && (searchEnd == '$') && (searchEndPrev != '\\'))
2703                                         continue;       // Can't match end of line if end position before end of line
2704                                 endOfLine = resr.endPos;
2705                         }
2706                 } else {
2707                         if (line == resr.lineRangeEnd) {
2708                                 if ((resr.endPos != startOfLine) && (s[0] == '^'))
2709                                         continue;       // Can't match start of line if end position after start of line
2710                                 startOfLine = resr.endPos;
2711                         }
2712                         if (line == resr.lineRangeStart) {
2713                                 if ((resr.startPos != endOfLine) && (searchEnd == '$') && (searchEndPrev != '\\'))
2714                                         continue;       // Can't match end of line if start position before end of line
2715                                 endOfLine = resr.startPos;
2716                         }
2717                 }
2718
2719                 DocumentIndexer di(doc, endOfLine);
2720                 int success = search.Execute(di, startOfLine, endOfLine);
2721                 if (success) {
2722                         pos = search.bopat[0];
2723                         // Ensure only whole characters selected
2724                         search.eopat[0] = doc->MovePositionOutsideChar(search.eopat[0], 1, false);
2725                         lenRet = search.eopat[0] - search.bopat[0];
2726                         // There can be only one start of a line, so no need to look for last match in line
2727                         if ((resr.increment == -1) && (s[0] != '^')) {
2728                                 // Check for the last match on this line.
2729                                 int repetitions = 1000; // Break out of infinite loop
2730                                 while (success && (search.eopat[0] <= endOfLine) && (repetitions--)) {
2731                                         success = search.Execute(di, pos+1, endOfLine);
2732                                         if (success) {
2733                                                 if (search.eopat[0] <= minPos) {
2734                                                         pos = search.bopat[0];
2735                                                         lenRet = search.eopat[0] - search.bopat[0];
2736                                                 } else {
2737                                                         success = 0;
2738                                                 }
2739                                         }
2740                                 }
2741                         }
2742                         break;
2743                 }
2744         }
2745         *length = lenRet;
2746         return pos;
2747 }
2748
2749 const char *BuiltinRegex::SubstituteByPosition(Document *doc, const char *text, int *length) {
2750         substituted.clear();
2751         DocumentIndexer di(doc, doc->Length());
2752         search.GrabMatches(di);
2753         for (int j = 0; j < *length; j++) {
2754                 if (text[j] == '\\') {
2755                         if (text[j + 1] >= '0' && text[j + 1] <= '9') {
2756                                 unsigned int patNum = text[j + 1] - '0';
2757                                 unsigned int len = search.eopat[patNum] - search.bopat[patNum];
2758                                 if (!search.pat[patNum].empty())        // Will be null if try for a match that did not occur
2759                                         substituted.append(search.pat[patNum].c_str(), len);
2760                                 j++;
2761                         } else {
2762                                 j++;
2763                                 switch (text[j]) {
2764                                 case 'a':
2765                                         substituted.push_back('\a');
2766                                         break;
2767                                 case 'b':
2768                                         substituted.push_back('\b');
2769                                         break;
2770                                 case 'f':
2771                                         substituted.push_back('\f');
2772                                         break;
2773                                 case 'n':
2774                                         substituted.push_back('\n');
2775                                         break;
2776                                 case 'r':
2777                                         substituted.push_back('\r');
2778                                         break;
2779                                 case 't':
2780                                         substituted.push_back('\t');
2781                                         break;
2782                                 case 'v':
2783                                         substituted.push_back('\v');
2784                                         break;
2785                                 case '\\':
2786                                         substituted.push_back('\\');
2787                                         break;
2788                                 default:
2789                                         substituted.push_back('\\');
2790                                         j--;
2791                                 }
2792                         }
2793                 } else {
2794                         substituted.push_back(text[j]);
2795                 }
2796         }
2797         *length = static_cast<int>(substituted.length());
2798         return substituted.c_str();
2799 }
2800
2801 #ifndef SCI_OWNREGEX
2802
2803 #ifdef SCI_NAMESPACE
2804
2805 RegexSearchBase *Scintilla::CreateRegexSearch(CharClassify *charClassTable) {
2806         return new BuiltinRegex(charClassTable);
2807 }
2808
2809 #else
2810
2811 RegexSearchBase *CreateRegexSearch(CharClassify *charClassTable) {
2812         return new BuiltinRegex(charClassTable);
2813 }
2814
2815 #endif
2816
2817 #endif