scintilla/src/Document.cxx

   1 // Scintilla source code edit control
   2 /** @file Document.cxx
   3  ** Text document that handles notifications, DBCS, styling, words and end of line.
   4  **/
   5 // Copyright 1998-2011 by Neil Hodgson <neilh@scintilla.org>
   6 // The License.txt file describes the conditions under which this software may be distributed.
   7
   8 #include <cstddef>
   9 #include <cstdlib>
  10 #include <cassert>
  11 #include <cstring>
  12 #include <cstdio>
  13 #include <cmath>
  14
  15 #include <stdexcept>
  16 #include <string>
  17 #include <vector>
  18 #include <forward_list>
  19 #include <algorithm>
  20 #include <memory>
  21 #include <chrono>
  22
  23 #ifndef NO_CXX11_REGEX
  24 #include <regex>
  25 #endif
  26
  27 #include "Platform.h"
  28
  29 #include "ILoader.h"
  30 #include "ILexer.h"
  31 #include "Scintilla.h"
  32
  33 #include "CharacterSet.h"
  34 #include "CharacterCategory.h"
  35 #include "Position.h"
  36 #include "SplitVector.h"
  37 #include "Partitioning.h"
  38 #include "RunStyles.h"
  39 #include "CellBuffer.h"
  40 #include "PerLine.h"
  41 #include "CharClassify.h"
  42 #include "Decoration.h"
  43 #include "CaseFolder.h"
  44 #include "Document.h"
  45 #include "RESearch.h"
  46 #include "UniConversion.h"
  47 #include "ElapsedPeriod.h"
  48
  49 using namespace Scintilla;
  50
  51 void LexInterface::Colourise(Sci::Position start, Sci::Position end) {
  52         if (pdoc && instance && !performingStyle) {
  53                 // Protect against reentrance, which may occur, for example, when
  54                 // fold points are discovered while performing styling and the folding
  55                 // code looks for child lines which may trigger styling.
  56                 performingStyle = true;
  57
  58                 const Sci::Position lengthDoc = pdoc->Length();
  59                 if (end == -1)
  60                         end = lengthDoc;
  61                 const Sci::Position len = end - start;
  62
  63                 PLATFORM_ASSERT(len >= 0);
  64                 PLATFORM_ASSERT(start + len <= lengthDoc);
  65
  66                 int styleStart = 0;
  67                 if (start > 0)
  68                         styleStart = pdoc->StyleAt(start - 1);
  69
  70                 if (len > 0) {
  71                         instance->Lex(start, len, styleStart, pdoc);
  72                         instance->Fold(start, len, styleStart, pdoc);
  73                 }
  74
  75                 performingStyle = false;
  76         }
  77 }
  78
  79 int LexInterface::LineEndTypesSupported() {
  80         if (instance) {
  81                 const int interfaceVersion = instance->Version();
  82                 if (interfaceVersion >= lvSubStyles) {
  83                         ILexerWithSubStyles *ssinstance = static_cast<ILexerWithSubStyles *>(instance);
  84                         return ssinstance->LineEndTypesSupported();
  85                 }
  86         }
  87         return 0;
  88 }
  89
  90 ActionDuration::ActionDuration(double duration_, double minDuration_, double maxDuration_) noexcept :
  91         duration(duration_), minDuration(minDuration_), maxDuration(maxDuration_) {
  92 }
  93
  94 void ActionDuration::AddSample(size_t numberActions, double durationOfActions) noexcept {
  95         // Only adjust for multiple actions to avoid instability
  96         if (numberActions < 8)
  97                 return;
  98
  99         // Alpha value for exponential smoothing.
 100         // Most recent value contributes 25% to smoothed value.
 101         const double alpha = 0.25;
 102
 103         const double durationOne = durationOfActions / numberActions;
 104         duration = Sci::clamp(alpha * durationOne + (1.0 - alpha) * duration,
 105                 minDuration, maxDuration);
 106 }
 107
 108 double ActionDuration::Duration() const noexcept {
 109         return duration;
 110 }
 111
 112 Document::Document(int options) :
 113         cb((options & SC_DOCUMENTOPTION_STYLES_NONE) == 0, (options & SC_DOCUMENTOPTION_TEXT_LARGE) != 0),
 114         durationStyleOneLine(0.00001, 0.000001, 0.0001) {
 115         refCount = 0;
 116 #ifdef _WIN32
 117         eolMode = SC_EOL_CRLF;
 118 #else
 119         eolMode = SC_EOL_LF;
 120 #endif
 121         dbcsCodePage = SC_CP_UTF8;
 122         lineEndBitSet = SC_LINE_END_TYPE_DEFAULT;
 123         endStyled = 0;
 124         styleClock = 0;
 125         enteredModification = 0;
 126         enteredStyling = 0;
 127         enteredReadOnlyCount = 0;
 128         insertionSet = false;
 129         tabInChars = 8;
 130         indentInChars = 0;
 131         actualIndentInChars = 8;
 132         useTabs = true;
 133         tabIndents = true;
 134         backspaceUnindents = false;
 135
 136         matchesValid = false;
 137
 138         perLineData[ldMarkers].reset(new LineMarkers());
 139         perLineData[ldLevels].reset(new LineLevels());
 140         perLineData[ldState].reset(new LineState());
 141         perLineData[ldMargin].reset(new LineAnnotation());
 142         perLineData[ldAnnotation].reset(new LineAnnotation());
 143
 144         decorations = DecorationListCreate(IsLarge());
 145
 146         cb.SetPerLine(this);
 147         cb.SetUTF8Substance(SC_CP_UTF8 == dbcsCodePage);
 148 }
 149
 150 Document::~Document() {
 151         for (const WatcherWithUserData &watcher : watchers) {
 152                 watcher.watcher->NotifyDeleted(this, watcher.userData);
 153         }
 154 }
 155
 156 // Increase reference count and return its previous value.
 157 int Document::AddRef() {
 158         return refCount++;
 159 }
 160
 161 // Decrease reference count and return its previous value.
 162 // Delete the document if reference count reaches zero.
 163 int SCI_METHOD Document::Release() {
 164         const int curRefCount = --refCount;
 165         if (curRefCount == 0)
 166                 delete this;
 167         return curRefCount;
 168 }
 169
 170 void Document::Init() {
 171         for (const std::unique_ptr<PerLine> &pl : perLineData) {
 172                 if (pl)
 173                         pl->Init();
 174         }
 175 }
 176
 177 void Document::InsertLine(Sci::Line line) {
 178         for (const std::unique_ptr<PerLine> &pl : perLineData) {
 179                 if (pl)
 180                         pl->InsertLine(line);
 181         }
 182 }
 183
 184 void Document::RemoveLine(Sci::Line line) {
 185         for (const std::unique_ptr<PerLine> &pl : perLineData) {
 186                 if (pl)
 187                         pl->RemoveLine(line);
 188         }
 189 }
 190
 191 LineMarkers *Document::Markers() const {
 192         return static_cast<LineMarkers *>(perLineData[ldMarkers].get());
 193 }
 194
 195 LineLevels *Document::Levels() const {
 196         return static_cast<LineLevels *>(perLineData[ldLevels].get());
 197 }
 198
 199 LineState *Document::States() const {
 200         return static_cast<LineState *>(perLineData[ldState].get());
 201 }
 202
 203 LineAnnotation *Document::Margins() const {
 204         return static_cast<LineAnnotation *>(perLineData[ldMargin].get());
 205 }
 206
 207 LineAnnotation *Document::Annotations() const {
 208         return static_cast<LineAnnotation *>(perLineData[ldAnnotation].get());
 209 }
 210
 211 int Document::LineEndTypesSupported() const {
 212         if ((SC_CP_UTF8 == dbcsCodePage) && pli)
 213                 return pli->LineEndTypesSupported();
 214         else
 215                 return 0;
 216 }
 217
 218 bool Document::SetDBCSCodePage(int dbcsCodePage_) {
 219         if (dbcsCodePage != dbcsCodePage_) {
 220                 dbcsCodePage = dbcsCodePage_;
 221                 SetCaseFolder(nullptr);
 222                 cb.SetLineEndTypes(lineEndBitSet & LineEndTypesSupported());
 223                 cb.SetUTF8Substance(SC_CP_UTF8 == dbcsCodePage);
 224                 ModifiedAt(0);  // Need to restyle whole document
 225                 return true;
 226         } else {
 227                 return false;
 228         }
 229 }
 230
 231 bool Document::SetLineEndTypesAllowed(int lineEndBitSet_) {
 232         if (lineEndBitSet != lineEndBitSet_) {
 233                 lineEndBitSet = lineEndBitSet_;
 234                 const int lineEndBitSetActive = lineEndBitSet & LineEndTypesSupported();
 235                 if (lineEndBitSetActive != cb.GetLineEndTypes()) {
 236                         ModifiedAt(0);
 237                         cb.SetLineEndTypes(lineEndBitSetActive);
 238                         return true;
 239                 } else {
 240                         return false;
 241                 }
 242         } else {
 243                 return false;
 244         }
 245 }
 246
 247 void Document::SetSavePoint() {
 248         cb.SetSavePoint();
 249         NotifySavePoint(true);
 250 }
 251
 252 void Document::TentativeUndo() {
 253         if (!TentativeActive())
 254                 return;
 255         CheckReadOnly();
 256         if (enteredModification == 0) {
 257                 enteredModification++;
 258                 if (!cb.IsReadOnly()) {
 259                         const bool startSavePoint = cb.IsSavePoint();
 260                         bool multiLine = false;
 261                         const int steps = cb.TentativeSteps();
 262                         //Platform::DebugPrintf("Steps=%d\n", steps);
 263                         for (int step = 0; step < steps; step++) {
 264                                 const Sci::Line prevLinesTotal = LinesTotal();
 265                                 const Action &action = cb.GetUndoStep();
 266                                 if (action.at == removeAction) {
 267                                         NotifyModified(DocModification(
 268                                                                         SC_MOD_BEFOREINSERT | SC_PERFORMED_UNDO, action));
 269                                 } else if (action.at == containerAction) {
 270                                         DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_UNDO);
 271                                         dm.token = action.position;
 272                                         NotifyModified(dm);
 273                                 } else {
 274                                         NotifyModified(DocModification(
 275                                                                         SC_MOD_BEFOREDELETE | SC_PERFORMED_UNDO, action));
 276                                 }
 277                                 cb.PerformUndoStep();
 278                                 if (action.at != containerAction) {
 279                                         ModifiedAt(action.position);
 280                                 }
 281
 282                                 int modFlags = SC_PERFORMED_UNDO;
 283                                 // With undo, an insertion action becomes a deletion notification
 284                                 if (action.at == removeAction) {
 285                                         modFlags |= SC_MOD_INSERTTEXT;
 286                                 } else if (action.at == insertAction) {
 287                                         modFlags |= SC_MOD_DELETETEXT;
 288                                 }
 289                                 if (steps > 1)
 290                                         modFlags |= SC_MULTISTEPUNDOREDO;
 291                                 const Sci::Line linesAdded = LinesTotal() - prevLinesTotal;
 292                                 if (linesAdded != 0)
 293                                         multiLine = true;
 294                                 if (step == steps - 1) {
 295                                         modFlags |= SC_LASTSTEPINUNDOREDO;
 296                                         if (multiLine)
 297                                                 modFlags |= SC_MULTILINEUNDOREDO;
 298                                 }
 299                                 NotifyModified(DocModification(modFlags, action.position, action.lenData,
 300                                                                                            linesAdded, action.data.get()));
 301                         }
 302
 303                         const bool endSavePoint = cb.IsSavePoint();
 304                         if (startSavePoint != endSavePoint)
 305                                 NotifySavePoint(endSavePoint);
 306
 307                         cb.TentativeCommit();
 308                 }
 309                 enteredModification--;
 310         }
 311 }
 312
 313 int Document::GetMark(Sci::Line line) const {
 314         return Markers()->MarkValue(line);
 315 }
 316
 317 Sci::Line Document::MarkerNext(Sci::Line lineStart, int mask) const {
 318         return Markers()->MarkerNext(lineStart, mask);
 319 }
 320
 321 int Document::AddMark(Sci::Line line, int markerNum) {
 322         if (line >= 0 && line <= LinesTotal()) {
 323                 const int prev = Markers()->AddMark(line, markerNum, LinesTotal());
 324                 const DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, nullptr, line);
 325                 NotifyModified(mh);
 326                 return prev;
 327         } else {
 328                 return -1;
 329         }
 330 }
 331
 332 void Document::AddMarkSet(Sci::Line line, int valueSet) {
 333         if (line < 0 || line > LinesTotal()) {
 334                 return;
 335         }
 336         unsigned int m = valueSet;
 337         for (int i = 0; m; i++, m >>= 1) {
 338                 if (m & 1)
 339                         Markers()->AddMark(line, i, LinesTotal());
 340         }
 341         const DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, nullptr, line);
 342         NotifyModified(mh);
 343 }
 344
 345 void Document::DeleteMark(Sci::Line line, int markerNum) {
 346         Markers()->DeleteMark(line, markerNum, false);
 347         const DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, nullptr, line);
 348         NotifyModified(mh);
 349 }
 350
 351 void Document::DeleteMarkFromHandle(int markerHandle) {
 352         Markers()->DeleteMarkFromHandle(markerHandle);
 353         DocModification mh(SC_MOD_CHANGEMARKER);
 354         mh.line = -1;
 355         NotifyModified(mh);
 356 }
 357
 358 void Document::DeleteAllMarks(int markerNum) {
 359         bool someChanges = false;
 360         for (Sci::Line line = 0; line < LinesTotal(); line++) {
 361                 if (Markers()->DeleteMark(line, markerNum, true))
 362                         someChanges = true;
 363         }
 364         if (someChanges) {
 365                 DocModification mh(SC_MOD_CHANGEMARKER);
 366                 mh.line = -1;
 367                 NotifyModified(mh);
 368         }
 369 }
 370
 371 Sci::Line Document::LineFromHandle(int markerHandle) const {
 372         return Markers()->LineFromHandle(markerHandle);
 373 }
 374
 375 Sci_Position SCI_METHOD Document::LineStart(Sci_Position line) const {
 376         return cb.LineStart(static_cast<Sci::Line>(line));
 377 }
 378
 379 bool Document::IsLineStartPosition(Sci::Position position) const {
 380         return LineStart(LineFromPosition(position)) == position;
 381 }
 382
 383 Sci_Position SCI_METHOD Document::LineEnd(Sci_Position line) const {
 384         if (line >= LinesTotal() - 1) {
 385                 return LineStart(line + 1);
 386         } else {
 387                 Sci::Position position = LineStart(line + 1);
 388                 if (SC_CP_UTF8 == dbcsCodePage) {
 389                         const unsigned char bytes[] = {
 390                                 cb.UCharAt(position-3),
 391                                 cb.UCharAt(position-2),
 392                                 cb.UCharAt(position-1),
 393                         };
 394                         if (UTF8IsSeparator(bytes)) {
 395                                 return position - UTF8SeparatorLength;
 396                         }
 397                         if (UTF8IsNEL(bytes+1)) {
 398                                 return position - UTF8NELLength;
 399                         }
 400                 }
 401                 position--; // Back over CR or LF
 402                 // When line terminator is CR+LF, may need to go back one more
 403                 if ((position > LineStart(line)) && (cb.CharAt(position - 1) == '\r')) {
 404                         position--;
 405                 }
 406                 return position;
 407         }
 408 }
 409
 410 void SCI_METHOD Document::SetErrorStatus(int status) {
 411         // Tell the watchers an error has occurred.
 412         for (const WatcherWithUserData &watcher : watchers) {
 413                 watcher.watcher->NotifyErrorOccurred(this, watcher.userData, status);
 414         }
 415 }
 416
 417 Sci_Position SCI_METHOD Document::LineFromPosition(Sci_Position pos) const {
 418         return cb.LineFromPosition(pos);
 419 }
 420
 421 Sci::Line Document::SciLineFromPosition(Sci::Position pos) const noexcept {
 422         // Avoids casting in callers for this very common function
 423         return cb.LineFromPosition(pos);
 424 }
 425
 426 Sci::Position Document::LineEndPosition(Sci::Position position) const {
 427         return LineEnd(LineFromPosition(position));
 428 }
 429
 430 bool Document::IsLineEndPosition(Sci::Position position) const {
 431         return LineEnd(LineFromPosition(position)) == position;
 432 }
 433
 434 bool Document::IsPositionInLineEnd(Sci::Position position) const {
 435         return position >= LineEnd(LineFromPosition(position));
 436 }
 437
 438 Sci::Position Document::VCHomePosition(Sci::Position position) const {
 439         const Sci::Line line = SciLineFromPosition(position);
 440         const Sci::Position startPosition = LineStart(line);
 441         const Sci::Position endLine = LineEnd(line);
 442         Sci::Position startText = startPosition;
 443         while (startText < endLine && (cb.CharAt(startText) == ' ' || cb.CharAt(startText) == '\t'))
 444                 startText++;
 445         if (position == startText)
 446                 return startPosition;
 447         else
 448                 return startText;
 449 }
 450
 451 Sci::Position Document::IndexLineStart(Sci::Line line, int lineCharacterIndex) const {
 452         return cb.IndexLineStart(line, lineCharacterIndex);
 453 }
 454
 455 Sci::Line Document::LineFromPositionIndex(Sci::Position pos, int lineCharacterIndex) const {
 456         return cb.LineFromPositionIndex(pos, lineCharacterIndex);
 457 }
 458
 459 int SCI_METHOD Document::SetLevel(Sci_Position line, int level) {
 460         const int prev = Levels()->SetLevel(static_cast<Sci::Line>(line), level, LinesTotal());
 461         if (prev != level) {
 462                 DocModification mh(SC_MOD_CHANGEFOLD | SC_MOD_CHANGEMARKER,
 463                                    LineStart(line), 0, 0, nullptr, static_cast<Sci::Line>(line));
 464                 mh.foldLevelNow = level;
 465                 mh.foldLevelPrev = prev;
 466                 NotifyModified(mh);
 467         }
 468         return prev;
 469 }
 470
 471 int SCI_METHOD Document::GetLevel(Sci_Position line) const {
 472         return Levels()->GetLevel(static_cast<Sci::Line>(line));
 473 }
 474
 475 void Document::ClearLevels() {
 476         Levels()->ClearLevels();
 477 }
 478
 479 static bool IsSubordinate(int levelStart, int levelTry) noexcept {
 480         if (levelTry & SC_FOLDLEVELWHITEFLAG)
 481                 return true;
 482         else
 483                 return LevelNumber(levelStart) < LevelNumber(levelTry);
 484 }
 485
 486 Sci::Line Document::GetLastChild(Sci::Line lineParent, int level, Sci::Line lastLine) {
 487         if (level == -1)
 488                 level = LevelNumber(GetLevel(lineParent));
 489         const Sci::Line maxLine = LinesTotal();
 490         const Sci::Line lookLastLine = (lastLine != -1) ? std::min(LinesTotal() - 1, lastLine) : -1;
 491         Sci::Line lineMaxSubord = lineParent;
 492         while (lineMaxSubord < maxLine - 1) {
 493                 EnsureStyledTo(LineStart(lineMaxSubord + 2));
 494                 if (!IsSubordinate(level, GetLevel(lineMaxSubord + 1)))
 495                         break;
 496                 if ((lookLastLine != -1) && (lineMaxSubord >= lookLastLine) && !(GetLevel(lineMaxSubord) & SC_FOLDLEVELWHITEFLAG))
 497                         break;
 498                 lineMaxSubord++;
 499         }
 500         if (lineMaxSubord > lineParent) {
 501                 if (level > LevelNumber(GetLevel(lineMaxSubord + 1))) {
 502                         // Have chewed up some whitespace that belongs to a parent so seek back
 503                         if (GetLevel(lineMaxSubord) & SC_FOLDLEVELWHITEFLAG) {
 504                                 lineMaxSubord--;
 505                         }
 506                 }
 507         }
 508         return lineMaxSubord;
 509 }
 510
 511 Sci::Line Document::GetFoldParent(Sci::Line line) const {
 512         const int level = LevelNumber(GetLevel(line));
 513         Sci::Line lineLook = line - 1;
 514         while ((lineLook > 0) && (
 515                     (!(GetLevel(lineLook) & SC_FOLDLEVELHEADERFLAG)) ||
 516                     (LevelNumber(GetLevel(lineLook)) >= level))
 517               ) {
 518                 lineLook--;
 519         }
 520         if ((GetLevel(lineLook) & SC_FOLDLEVELHEADERFLAG) &&
 521                 (LevelNumber(GetLevel(lineLook)) < level)) {
 522                 return lineLook;
 523         } else {
 524                 return -1;
 525         }
 526 }
 527
 528 void Document::GetHighlightDelimiters(HighlightDelimiter &highlightDelimiter, Sci::Line line, Sci::Line lastLine) {
 529         const int level = GetLevel(line);
 530         const Sci::Line lookLastLine = std::max(line, lastLine) + 1;
 531
 532         Sci::Line lookLine = line;
 533         int lookLineLevel = level;
 534         int lookLineLevelNum = LevelNumber(lookLineLevel);
 535         while ((lookLine > 0) && ((lookLineLevel & SC_FOLDLEVELWHITEFLAG) ||
 536                 ((lookLineLevel & SC_FOLDLEVELHEADERFLAG) && (lookLineLevelNum >= LevelNumber(GetLevel(lookLine + 1)))))) {
 537                 lookLineLevel = GetLevel(--lookLine);
 538                 lookLineLevelNum = LevelNumber(lookLineLevel);
 539         }
 540
 541         Sci::Line beginFoldBlock = (lookLineLevel & SC_FOLDLEVELHEADERFLAG) ? lookLine : GetFoldParent(lookLine);
 542         if (beginFoldBlock == -1) {
 543                 highlightDelimiter.Clear();
 544                 return;
 545         }
 546
 547         Sci::Line endFoldBlock = GetLastChild(beginFoldBlock, -1, lookLastLine);
 548         Sci::Line firstChangeableLineBefore = -1;
 549         if (endFoldBlock < line) {
 550                 lookLine = beginFoldBlock - 1;
 551                 lookLineLevel = GetLevel(lookLine);
 552                 lookLineLevelNum = LevelNumber(lookLineLevel);
 553                 while ((lookLine >= 0) && (lookLineLevelNum >= SC_FOLDLEVELBASE)) {
 554                         if (lookLineLevel & SC_FOLDLEVELHEADERFLAG) {
 555                                 if (GetLastChild(lookLine, -1, lookLastLine) == line) {
 556                                         beginFoldBlock = lookLine;
 557                                         endFoldBlock = line;
 558                                         firstChangeableLineBefore = line - 1;
 559                                 }
 560                         }
 561                         if ((lookLine > 0) && (lookLineLevelNum == SC_FOLDLEVELBASE) && (LevelNumber(GetLevel(lookLine - 1)) > lookLineLevelNum))
 562                                 break;
 563                         lookLineLevel = GetLevel(--lookLine);
 564                         lookLineLevelNum = LevelNumber(lookLineLevel);
 565                 }
 566         }
 567         if (firstChangeableLineBefore == -1) {
 568                 for (lookLine = line - 1, lookLineLevel = GetLevel(lookLine), lookLineLevelNum = LevelNumber(lookLineLevel);
 569                         lookLine >= beginFoldBlock;
 570                         lookLineLevel = GetLevel(--lookLine), lookLineLevelNum = LevelNumber(lookLineLevel)) {
 571                         if ((lookLineLevel & SC_FOLDLEVELWHITEFLAG) || (lookLineLevelNum > LevelNumber(level))) {
 572                                 firstChangeableLineBefore = lookLine;
 573                                 break;
 574                         }
 575                 }
 576         }
 577         if (firstChangeableLineBefore == -1)
 578                 firstChangeableLineBefore = beginFoldBlock - 1;
 579
 580         Sci::Line firstChangeableLineAfter = -1;
 581         for (lookLine = line + 1, lookLineLevel = GetLevel(lookLine), lookLineLevelNum = LevelNumber(lookLineLevel);
 582                 lookLine <= endFoldBlock;
 583                 lookLineLevel = GetLevel(++lookLine), lookLineLevelNum = LevelNumber(lookLineLevel)) {
 584                 if ((lookLineLevel & SC_FOLDLEVELHEADERFLAG) && (lookLineLevelNum < LevelNumber(GetLevel(lookLine + 1)))) {
 585                         firstChangeableLineAfter = lookLine;
 586                         break;
 587                 }
 588         }
 589         if (firstChangeableLineAfter == -1)
 590                 firstChangeableLineAfter = endFoldBlock + 1;
 591
 592         highlightDelimiter.beginFoldBlock = beginFoldBlock;
 593         highlightDelimiter.endFoldBlock = endFoldBlock;
 594         highlightDelimiter.firstChangeableLineBefore = firstChangeableLineBefore;
 595         highlightDelimiter.firstChangeableLineAfter = firstChangeableLineAfter;
 596 }
 597
 598 Sci::Position Document::ClampPositionIntoDocument(Sci::Position pos) const {
 599         return Sci::clamp(pos, static_cast<Sci::Position>(0), static_cast<Sci::Position>(Length()));
 600 }
 601
 602 bool Document::IsCrLf(Sci::Position pos) const {
 603         if (pos < 0)
 604                 return false;
 605         if (pos >= (Length() - 1))
 606                 return false;
 607         return (cb.CharAt(pos) == '\r') && (cb.CharAt(pos + 1) == '\n');
 608 }
 609
 610 int Document::LenChar(Sci::Position pos) {
 611         if (pos < 0) {
 612                 return 1;
 613         } else if (IsCrLf(pos)) {
 614                 return 2;
 615         } else if (SC_CP_UTF8 == dbcsCodePage) {
 616                 const unsigned char leadByte = cb.UCharAt(pos);
 617                 const int widthCharBytes = UTF8BytesOfLead[leadByte];
 618                 const Sci::Position lengthDoc = Length();
 619                 if ((pos + widthCharBytes) > lengthDoc)
 620                         return static_cast<int>(lengthDoc - pos);
 621                 else
 622                         return widthCharBytes;
 623         } else if (dbcsCodePage) {
 624                 return IsDBCSLeadByteNoExcept(cb.CharAt(pos)) ? 2 : 1;
 625         } else {
 626                 return 1;
 627         }
 628 }
 629
 630 bool Document::InGoodUTF8(Sci::Position pos, Sci::Position &start, Sci::Position &end) const noexcept {
 631         Sci::Position trail = pos;
 632         while ((trail>0) && (pos-trail < UTF8MaxBytes) && UTF8IsTrailByte(cb.UCharAt(trail-1)))
 633                 trail--;
 634         start = (trail > 0) ? trail-1 : trail;
 635
 636         const unsigned char leadByte = cb.UCharAt(start);
 637         const int widthCharBytes = UTF8BytesOfLead[leadByte];
 638         if (widthCharBytes == 1) {
 639                 return false;
 640         } else {
 641                 const int trailBytes = widthCharBytes - 1;
 642                 const Sci::Position len = pos - start;
 643                 if (len > trailBytes)
 644                         // pos too far from lead
 645                         return false;
 646                 unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0};
 647                 for (Sci::Position b=1; b<widthCharBytes && ((start+b) < cb.Length()); b++)
 648                         charBytes[b] = cb.CharAt(start+b);
 649                 const int utf8status = UTF8Classify(charBytes, widthCharBytes);
 650                 if (utf8status & UTF8MaskInvalid)
 651                         return false;
 652                 end = start + widthCharBytes;
 653                 return true;
 654         }
 655 }
 656
 657 // Normalise a position so that it is not halfway through a two byte character.
 658 // This can occur in two situations -
 659 // When lines are terminated with \r\n pairs which should be treated as one character.
 660 // When displaying DBCS text such as Japanese.
 661 // If moving, move the position in the indicated direction.
 662 Sci::Position Document::MovePositionOutsideChar(Sci::Position pos, Sci::Position moveDir, bool checkLineEnd) const {
 663         //Platform::DebugPrintf("NoCRLF %d %d\n", pos, moveDir);
 664         // If out of range, just return minimum/maximum value.
 665         if (pos <= 0)
 666                 return 0;
 667         if (pos >= Length())
 668                 return Length();
 669
 670         // PLATFORM_ASSERT(pos > 0 && pos < Length());
 671         if (checkLineEnd && IsCrLf(pos - 1)) {
 672                 if (moveDir > 0)
 673                         return pos + 1;
 674                 else
 675                         return pos - 1;
 676         }
 677
 678         if (dbcsCodePage) {
 679                 if (SC_CP_UTF8 == dbcsCodePage) {
 680                         const unsigned char ch = cb.UCharAt(pos);
 681                         // If ch is not a trail byte then pos is valid intercharacter position
 682                         if (UTF8IsTrailByte(ch)) {
 683                                 Sci::Position startUTF = pos;
 684                                 Sci::Position endUTF = pos;
 685                                 if (InGoodUTF8(pos, startUTF, endUTF)) {
 686                                         // ch is a trail byte within a UTF-8 character
 687                                         if (moveDir > 0)
 688                                                 pos = endUTF;
 689                                         else
 690                                                 pos = startUTF;
 691                                 }
 692                                 // Else invalid UTF-8 so return position of isolated trail byte
 693                         }
 694                 } else {
 695                         // Anchor DBCS calculations at start of line because start of line can
 696                         // not be a DBCS trail byte.
 697                         const Sci::Position posStartLine = LineStart(LineFromPosition(pos));
 698                         if (pos == posStartLine)
 699                                 return pos;
 700
 701                         // Step back until a non-lead-byte is found.
 702                         Sci::Position posCheck = pos;
 703                         while ((posCheck > posStartLine) && IsDBCSLeadByteNoExcept(cb.CharAt(posCheck-1)))
 704                                 posCheck--;
 705
 706                         // Check from known start of character.
 707                         while (posCheck < pos) {
 708                                 const int mbsize = IsDBCSLeadByteNoExcept(cb.CharAt(posCheck)) ? 2 : 1;
 709                                 if (posCheck + mbsize == pos) {
 710                                         return pos;
 711                                 } else if (posCheck + mbsize > pos) {
 712                                         if (moveDir > 0) {
 713                                                 return posCheck + mbsize;
 714                                         } else {
 715                                                 return posCheck;
 716                                         }
 717                                 }
 718                                 posCheck += mbsize;
 719                         }
 720                 }
 721         }
 722
 723         return pos;
 724 }
 725
 726 // NextPosition moves between valid positions - it can not handle a position in the middle of a
 727 // multi-byte character. It is used to iterate through text more efficiently than MovePositionOutsideChar.
 728 // A \r\n pair is treated as two characters.
 729 Sci::Position Document::NextPosition(Sci::Position pos, int moveDir) const noexcept {
 730         // If out of range, just return minimum/maximum value.
 731         const int increment = (moveDir > 0) ? 1 : -1;
 732         if (pos + increment <= 0)
 733                 return 0;
 734         if (pos + increment >= cb.Length())
 735                 return cb.Length();
 736
 737         if (dbcsCodePage) {
 738                 if (SC_CP_UTF8 == dbcsCodePage) {
 739                         if (increment == 1) {
 740                                 // Simple forward movement case so can avoid some checks
 741                                 const unsigned char leadByte = cb.UCharAt(pos);
 742                                 if (UTF8IsAscii(leadByte)) {
 743                                         // Single byte character or invalid
 744                                         pos++;
 745                                 } else {
 746                                         const int widthCharBytes = UTF8BytesOfLead[leadByte];
 747                                         unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0};
 748                                         for (int b=1; b<widthCharBytes; b++)
 749                                                 charBytes[b] = cb.CharAt(pos+b);
 750                                         const int utf8status = UTF8Classify(charBytes, widthCharBytes);
 751                                         if (utf8status & UTF8MaskInvalid)
 752                                                 pos++;
 753                                         else
 754                                                 pos += utf8status & UTF8MaskWidth;
 755                                 }
 756                         } else {
 757                                 // Examine byte before position
 758                                 pos--;
 759                                 const unsigned char ch = cb.UCharAt(pos);
 760                                 // If ch is not a trail byte then pos is valid intercharacter position
 761                                 if (UTF8IsTrailByte(ch)) {
 762                                         // If ch is a trail byte in a valid UTF-8 character then return start of character
 763                                         Sci::Position startUTF = pos;
 764                                         Sci::Position endUTF = pos;
 765                                         if (InGoodUTF8(pos, startUTF, endUTF)) {
 766                                                 pos = startUTF;
 767                                         }
 768                                         // Else invalid UTF-8 so return position of isolated trail byte
 769                                 }
 770                         }
 771                 } else {
 772                         if (moveDir > 0) {
 773                                 const int mbsize = IsDBCSLeadByteNoExcept(cb.CharAt(pos)) ? 2 : 1;
 774                                 pos += mbsize;
 775                                 if (pos > cb.Length())
 776                                         pos = cb.Length();
 777                         } else {
 778                                 // Anchor DBCS calculations at start of line because start of line can
 779                                 // not be a DBCS trail byte.
 780                                 const Sci::Position posStartLine = cb.LineStart(cb.LineFromPosition(pos));
 781                                 // See http://msdn.microsoft.com/en-us/library/cc194792%28v=MSDN.10%29.aspx
 782                                 // http://msdn.microsoft.com/en-us/library/cc194790.aspx
 783                                 if ((pos - 1) <= posStartLine) {
 784                                         return pos - 1;
 785                                 } else if (IsDBCSLeadByteNoExcept(cb.CharAt(pos - 1))) {
 786                                         // Must actually be trail byte
 787                                         return pos - 2;
 788                                 } else {
 789                                         // Otherwise, step back until a non-lead-byte is found.
 790                                         Sci::Position posTemp = pos - 1;
 791                                         while (posStartLine <= --posTemp && IsDBCSLeadByteNoExcept(cb.CharAt(posTemp)))
 792                                                 ;
 793                                         // Now posTemp+1 must point to the beginning of a character,
 794                                         // so figure out whether we went back an even or an odd
 795                                         // number of bytes and go back 1 or 2 bytes, respectively.
 796                                         return (pos - 1 - ((pos - posTemp) & 1));
 797                                 }
 798                         }
 799                 }
 800         } else {
 801                 pos += increment;
 802         }
 803
 804         return pos;
 805 }
 806
 807 bool Document::NextCharacter(Sci::Position &pos, int moveDir) const noexcept {
 808         // Returns true if pos changed
 809         Sci::Position posNext = NextPosition(pos, moveDir);
 810         if (posNext == pos) {
 811                 return false;
 812         } else {
 813                 pos = posNext;
 814                 return true;
 815         }
 816 }
 817
 818 Document::CharacterExtracted Document::CharacterAfter(Sci::Position position) const {
 819         if (position >= Length()) {
 820                 return CharacterExtracted(unicodeReplacementChar, 0);
 821         }
 822         const unsigned char leadByte = cb.UCharAt(position);
 823         if (!dbcsCodePage || UTF8IsAscii(leadByte)) {
 824                 // Common case: ASCII character
 825                 return CharacterExtracted(leadByte, 1);
 826         }
 827         if (SC_CP_UTF8 == dbcsCodePage) {
 828                 const int widthCharBytes = UTF8BytesOfLead[leadByte];
 829                 unsigned char charBytes[UTF8MaxBytes] = { leadByte, 0, 0, 0 };
 830                 for (int b = 1; b<widthCharBytes; b++)
 831                         charBytes[b] = cb.UCharAt(position + b);
 832                 const int utf8status = UTF8Classify(charBytes, widthCharBytes);
 833                 if (utf8status & UTF8MaskInvalid) {
 834                         // Treat as invalid and use up just one byte
 835                         return CharacterExtracted(unicodeReplacementChar, 1);
 836                 } else {
 837                         return CharacterExtracted(UnicodeFromUTF8(charBytes), utf8status & UTF8MaskWidth);
 838                 }
 839         } else {
 840                 if (IsDBCSLeadByteNoExcept(leadByte) && ((position + 1) < Length())) {
 841                         return CharacterExtracted::DBCS(leadByte, cb.UCharAt(position + 1));
 842                 } else {
 843                         return CharacterExtracted(leadByte, 1);
 844                 }
 845         }
 846 }
 847
 848 Document::CharacterExtracted Document::CharacterBefore(Sci::Position position) const {
 849         if (position <= 0) {
 850                 return CharacterExtracted(unicodeReplacementChar, 0);
 851         }
 852         const unsigned char previousByte = cb.UCharAt(position - 1);
 853         if (0 == dbcsCodePage) {
 854                 return CharacterExtracted(previousByte, 1);
 855         }
 856         if (SC_CP_UTF8 == dbcsCodePage) {
 857                 if (UTF8IsAscii(previousByte)) {
 858                         return CharacterExtracted(previousByte, 1);
 859                 }
 860                 position--;
 861                 // If previousByte is not a trail byte then its invalid
 862                 if (UTF8IsTrailByte(previousByte)) {
 863                         // If previousByte is a trail byte in a valid UTF-8 character then find start of character
 864                         Sci::Position startUTF = position;
 865                         Sci::Position endUTF = position;
 866                         if (InGoodUTF8(position, startUTF, endUTF)) {
 867                                 const int widthCharBytes = static_cast<int>(endUTF - startUTF);
 868                                 unsigned char charBytes[UTF8MaxBytes] = { 0, 0, 0, 0 };
 869                                 for (int b = 0; b<widthCharBytes; b++)
 870                                         charBytes[b] = cb.UCharAt(startUTF + b);
 871                                 const int utf8status = UTF8Classify(charBytes, widthCharBytes);
 872                                 if (utf8status & UTF8MaskInvalid) {
 873                                         // Treat as invalid and use up just one byte
 874                                         return CharacterExtracted(unicodeReplacementChar, 1);
 875                                 } else {
 876                                         return CharacterExtracted(UnicodeFromUTF8(charBytes), utf8status & UTF8MaskWidth);
 877                                 }
 878                         }
 879                         // Else invalid UTF-8 so return position of isolated trail byte
 880                 }
 881                 return CharacterExtracted(unicodeReplacementChar, 1);
 882         } else {
 883                 // Moving backwards in DBCS is complex so use NextPosition
 884                 const Sci::Position posStartCharacter = NextPosition(position, -1);
 885                 return CharacterAfter(posStartCharacter);
 886         }
 887 }
 888
 889 // Return -1  on out-of-bounds
 890 Sci_Position SCI_METHOD Document::GetRelativePosition(Sci_Position positionStart, Sci_Position characterOffset) const {
 891         Sci::Position pos = positionStart;
 892         if (dbcsCodePage) {
 893                 const int increment = (characterOffset > 0) ? 1 : -1;
 894                 while (characterOffset != 0) {
 895                         const Sci::Position posNext = NextPosition(pos, increment);
 896                         if (posNext == pos)
 897                                 return INVALID_POSITION;
 898                         pos = posNext;
 899                         characterOffset -= increment;
 900                 }
 901         } else {
 902                 pos = positionStart + characterOffset;
 903                 if ((pos < 0) || (pos > Length()))
 904                         return INVALID_POSITION;
 905         }
 906         return pos;
 907 }
 908
 909 Sci::Position Document::GetRelativePositionUTF16(Sci::Position positionStart, Sci::Position characterOffset) const {
 910         Sci::Position pos = positionStart;
 911         if (dbcsCodePage) {
 912                 const int increment = (characterOffset > 0) ? 1 : -1;
 913                 while (characterOffset != 0) {
 914                         const Sci::Position posNext = NextPosition(pos, increment);
 915                         if (posNext == pos)
 916                                 return INVALID_POSITION;
 917                         if (std::abs(pos-posNext) > 3)  // 4 byte character = 2*UTF16.
 918                                 characterOffset -= increment;
 919                         pos = posNext;
 920                         characterOffset -= increment;
 921                 }
 922         } else {
 923                 pos = positionStart + characterOffset;
 924                 if ((pos < 0) || (pos > Length()))
 925                         return INVALID_POSITION;
 926         }
 927         return pos;
 928 }
 929
 930 int SCI_METHOD Document::GetCharacterAndWidth(Sci_Position position, Sci_Position *pWidth) const {
 931         int character;
 932         int bytesInCharacter = 1;
 933         const unsigned char leadByte = cb.UCharAt(position);
 934         if (dbcsCodePage) {
 935                 if (SC_CP_UTF8 == dbcsCodePage) {
 936                         if (UTF8IsAscii(leadByte)) {
 937                                 // Single byte character or invalid
 938                                 character =  leadByte;
 939                         } else {
 940                                 const int widthCharBytes = UTF8BytesOfLead[leadByte];
 941                                 unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0};
 942                                 for (int b=1; b<widthCharBytes; b++)
 943                                         charBytes[b] = cb.UCharAt(position+b);
 944                                 const int utf8status = UTF8Classify(charBytes, widthCharBytes);
 945                                 if (utf8status & UTF8MaskInvalid) {
 946                                         // Report as singleton surrogate values which are invalid Unicode
 947                                         character =  0xDC80 + leadByte;
 948                                 } else {
 949                                         bytesInCharacter = utf8status & UTF8MaskWidth;
 950                                         character = UnicodeFromUTF8(charBytes);
 951                                 }
 952                         }
 953                 } else {
 954                         if (IsDBCSLeadByteNoExcept(leadByte)) {
 955                                 bytesInCharacter = 2;
 956                                 character = (leadByte << 8) | cb.UCharAt(position+1);
 957                         } else {
 958                                 character = leadByte;
 959                         }
 960                 }
 961         } else {
 962                 character = leadByte;
 963         }
 964         if (pWidth) {
 965                 *pWidth = bytesInCharacter;
 966         }
 967         return character;
 968 }
 969
 970 int SCI_METHOD Document::CodePage() const {
 971         return dbcsCodePage;
 972 }
 973
 974 bool SCI_METHOD Document::IsDBCSLeadByte(char ch) const {
 975         // Used by lexers so must match IDocument method exactly
 976         return IsDBCSLeadByteNoExcept(ch);
 977 }
 978
 979 bool Document::IsDBCSLeadByteNoExcept(char ch) const noexcept {
 980         // Used inside core Scintilla
 981         // Byte ranges found in Wikipedia articles with relevant search strings in each case
 982         const unsigned char uch = ch;
 983         switch (dbcsCodePage) {
 984                 case 932:
 985                         // Shift_jis
 986                         return ((uch >= 0x81) && (uch <= 0x9F)) ||
 987                                 ((uch >= 0xE0) && (uch <= 0xFC));
 988                                 // Lead bytes F0 to FC may be a Microsoft addition.
 989                 case 936:
 990                         // GBK
 991                         return (uch >= 0x81) && (uch <= 0xFE);
 992                 case 949:
 993                         // Korean Wansung KS C-5601-1987
 994                         return (uch >= 0x81) && (uch <= 0xFE);
 995                 case 950:
 996                         // Big5
 997                         return (uch >= 0x81) && (uch <= 0xFE);
 998                 case 1361:
 999                         // Korean Johab KS C-5601-1992
1000                         return
1001                                 ((uch >= 0x84) && (uch <= 0xD3)) ||
1002                                 ((uch >= 0xD8) && (uch <= 0xDE)) ||
1003                                 ((uch >= 0xE0) && (uch <= 0xF9));
1004         }
1005         return false;
1006 }
1007
1008 bool Document::IsDBCSLeadByteInvalid(char ch) const noexcept {
1009         const unsigned char lead = ch;
1010         switch (dbcsCodePage) {
1011         case 932:
1012                 // Shift_jis
1013                 return
1014                         (lead == 0x85) ||
1015                         (lead == 0x86) ||
1016                         (lead == 0xEB) ||
1017                         (lead == 0xEC) ||
1018                         (lead == 0xEF) ||
1019                         (lead == 0xFA) ||
1020                         (lead == 0xFB) ||
1021                         (lead == 0xFC);
1022         case 936:
1023                 // GBK
1024                 return (lead == 0x80) || (lead == 0xFF);
1025         case 949:
1026                 // Korean Wansung KS C-5601-1987
1027                 return (lead == 0x80) || (lead == 0xC9) || (lead >= 0xFE);
1028         case 950:
1029                 // Big5
1030                 return
1031                         ((lead >= 0x80) && (lead <= 0xA0)) ||
1032                         (lead == 0xC8) ||
1033                         (lead >= 0xFA);
1034         case 1361:
1035                 // Korean Johab KS C-5601-1992
1036                 return
1037                         ((lead >= 0x80) && (lead <= 0x83)) ||
1038                         ((lead >= 0xD4) && (lead <= 0xD8)) ||
1039                         (lead == 0xDF) ||
1040                         (lead >= 0xFA);
1041         }
1042         return false;
1043 }
1044
1045 bool Document::IsDBCSTrailByteInvalid(char ch) const noexcept {
1046         const unsigned char trail = ch;
1047         switch (dbcsCodePage) {
1048         case 932:
1049                 // Shift_jis
1050                 return
1051                         (trail <= 0x3F) ||
1052                         (trail == 0x7F) ||
1053                         (trail >= 0xFD);
1054         case 936:
1055                 // GBK
1056                 return
1057                         (trail <= 0x3F) ||
1058                         (trail == 0x7F) ||
1059                         (trail == 0xFF);
1060         case 949:
1061                 // Korean Wansung KS C-5601-1987
1062                 return
1063                         (trail <= 0x40) ||
1064                         ((trail >= 0x5B) && (trail <= 0x60)) ||
1065                         ((trail >= 0x7B) && (trail <= 0x80)) ||
1066                         (trail == 0xFF);
1067         case 950:
1068                 // Big5
1069                 return
1070                         (trail <= 0x3F) ||
1071                         ((trail >= 0x7F) && (trail <= 0xA0)) ||
1072                         (trail == 0xFF);
1073         case 1361:
1074                 // Korean Johab KS C-5601-1992
1075                 return
1076                         (trail <= 0x30) ||
1077                         (trail == 0x7F) ||
1078                         (trail == 0x80) ||
1079                         (trail == 0xFF);
1080         }
1081         return false;
1082 }
1083
1084 int Document::DBCSDrawBytes(const char *text, int len) const noexcept {
1085         if (len <= 1) {
1086                 return len;
1087         }
1088         if (IsDBCSLeadByteNoExcept(text[0])) {
1089                 return IsDBCSTrailByteInvalid(text[1]) ? 1 : 2;
1090         } else {
1091                 return 1;
1092         }
1093 }
1094
1095 static constexpr bool IsSpaceOrTab(int ch) noexcept {
1096         return ch == ' ' || ch == '\t';
1097 }
1098
1099 // Need to break text into segments near lengthSegment but taking into
1100 // account the encoding to not break inside a UTF-8 or DBCS character
1101 // and also trying to avoid breaking inside a pair of combining characters.
1102 // The segment length must always be long enough (more than 4 bytes)
1103 // so that there will be at least one whole character to make a segment.
1104 // For UTF-8, text must consist only of valid whole characters.
1105 // In preference order from best to worst:
1106 //   1) Break after space
1107 //   2) Break before punctuation
1108 //   3) Break after whole character
1109
1110 int Document::SafeSegment(const char *text, int length, int lengthSegment) const noexcept {
1111         if (length <= lengthSegment)
1112                 return length;
1113         int lastSpaceBreak = -1;
1114         int lastPunctuationBreak = -1;
1115         int lastEncodingAllowedBreak = 0;
1116         for (int j=0; j < lengthSegment;) {
1117                 const unsigned char ch = text[j];
1118                 if (j > 0) {
1119                         if (IsSpaceOrTab(text[j - 1]) && !IsSpaceOrTab(text[j])) {
1120                                 lastSpaceBreak = j;
1121                         }
1122                         if (ch < 'A') {
1123                                 lastPunctuationBreak = j;
1124                         }
1125                 }
1126                 lastEncodingAllowedBreak = j;
1127
1128                 if (dbcsCodePage == SC_CP_UTF8) {
1129                         j += UTF8BytesOfLead[ch];
1130                 } else if (dbcsCodePage) {
1131                         j += IsDBCSLeadByteNoExcept(ch) ? 2 : 1;
1132                 } else {
1133                         j++;
1134                 }
1135         }
1136         if (lastSpaceBreak >= 0) {
1137                 return lastSpaceBreak;
1138         } else if (lastPunctuationBreak >= 0) {
1139                 return lastPunctuationBreak;
1140         }
1141         return lastEncodingAllowedBreak;
1142 }
1143
1144 EncodingFamily Document::CodePageFamily() const noexcept {
1145         if (SC_CP_UTF8 == dbcsCodePage)
1146                 return efUnicode;
1147         else if (dbcsCodePage)
1148                 return efDBCS;
1149         else
1150                 return efEightBit;
1151 }
1152
1153 void Document::ModifiedAt(Sci::Position pos) noexcept {
1154         if (endStyled > pos)
1155                 endStyled = pos;
1156 }
1157
1158 void Document::CheckReadOnly() {
1159         if (cb.IsReadOnly() && enteredReadOnlyCount == 0) {
1160                 enteredReadOnlyCount++;
1161                 NotifyModifyAttempt();
1162                 enteredReadOnlyCount--;
1163         }
1164 }
1165
1166 // Document only modified by gateways DeleteChars, InsertString, Undo, Redo, and SetStyleAt.
1167 // SetStyleAt does not change the persistent state of a document
1168
1169 bool Document::DeleteChars(Sci::Position pos, Sci::Position len) {
1170         if (pos < 0)
1171                 return false;
1172         if (len <= 0)
1173                 return false;
1174         if ((pos + len) > Length())
1175                 return false;
1176         CheckReadOnly();
1177         if (enteredModification != 0) {
1178                 return false;
1179         } else {
1180                 enteredModification++;
1181                 if (!cb.IsReadOnly()) {
1182                         NotifyModified(
1183                             DocModification(
1184                                 SC_MOD_BEFOREDELETE | SC_PERFORMED_USER,
1185                                 pos, len,
1186                                 0, 0));
1187                         const Sci::Line prevLinesTotal = LinesTotal();
1188                         const bool startSavePoint = cb.IsSavePoint();
1189                         bool startSequence = false;
1190                         const char *text = cb.DeleteChars(pos, len, startSequence);
1191                         if (startSavePoint && cb.IsCollectingUndo())
1192                                 NotifySavePoint(!startSavePoint);
1193                         if ((pos < Length()) || (pos == 0))
1194                                 ModifiedAt(pos);
1195                         else
1196                                 ModifiedAt(pos-1);
1197                         NotifyModified(
1198                             DocModification(
1199                                 SC_MOD_DELETETEXT | SC_PERFORMED_USER | (startSequence?SC_STARTACTION:0),
1200                                 pos, len,
1201                                 LinesTotal() - prevLinesTotal, text));
1202                 }
1203                 enteredModification--;
1204         }
1205         return !cb.IsReadOnly();
1206 }
1207
1208 /**
1209  * Insert a string with a length.
1210  */
1211 Sci::Position Document::InsertString(Sci::Position position, const char *s, Sci::Position insertLength) {
1212         if (insertLength <= 0) {
1213                 return 0;
1214         }
1215         CheckReadOnly();        // Application may change read only state here
1216         if (cb.IsReadOnly()) {
1217                 return 0;
1218         }
1219         if (enteredModification != 0) {
1220                 return 0;
1221         }
1222         enteredModification++;
1223         insertionSet = false;
1224         insertion.clear();
1225         NotifyModified(
1226                 DocModification(
1227                         SC_MOD_INSERTCHECK,
1228                         position, insertLength,
1229                         0, s));
1230         if (insertionSet) {
1231                 s = insertion.c_str();
1232                 insertLength = insertion.length();
1233         }
1234         NotifyModified(
1235                 DocModification(
1236                         SC_MOD_BEFOREINSERT | SC_PERFORMED_USER,
1237                         position, insertLength,
1238                         0, s));
1239         const Sci::Line prevLinesTotal = LinesTotal();
1240         const bool startSavePoint = cb.IsSavePoint();
1241         bool startSequence = false;
1242         const char *text = cb.InsertString(position, s, insertLength, startSequence);
1243         if (startSavePoint && cb.IsCollectingUndo())
1244                 NotifySavePoint(!startSavePoint);
1245         ModifiedAt(position);
1246         NotifyModified(
1247                 DocModification(
1248                         SC_MOD_INSERTTEXT | SC_PERFORMED_USER | (startSequence?SC_STARTACTION:0),
1249                         position, insertLength,
1250                         LinesTotal() - prevLinesTotal, text));
1251         if (insertionSet) {     // Free memory as could be large
1252                 std::string().swap(insertion);
1253         }
1254         enteredModification--;
1255         return insertLength;
1256 }
1257
1258 void Document::ChangeInsertion(const char *s, Sci::Position length) {
1259         insertionSet = true;
1260         insertion.assign(s, length);
1261 }
1262
1263 int SCI_METHOD Document::AddData(const char *data, Sci_Position length) {
1264         try {
1265                 const Sci::Position position = Length();
1266                 InsertString(position, data, length);
1267         } catch (std::bad_alloc &) {
1268                 return SC_STATUS_BADALLOC;
1269         } catch (...) {
1270                 return SC_STATUS_FAILURE;
1271         }
1272         return 0;
1273 }
1274
1275 void * SCI_METHOD Document::ConvertToDocument() {
1276         return this;
1277 }
1278
1279 Sci::Position Document::Undo() {
1280         Sci::Position newPos = -1;
1281         CheckReadOnly();
1282         if ((enteredModification == 0) && (cb.IsCollectingUndo())) {
1283                 enteredModification++;
1284                 if (!cb.IsReadOnly()) {
1285                         const bool startSavePoint = cb.IsSavePoint();
1286                         bool multiLine = false;
1287                         const int steps = cb.StartUndo();
1288                         //Platform::DebugPrintf("Steps=%d\n", steps);
1289                         Sci::Position coalescedRemovePos = -1;
1290                         Sci::Position coalescedRemoveLen = 0;
1291                         Sci::Position prevRemoveActionPos = -1;
1292                         Sci::Position prevRemoveActionLen = 0;
1293                         for (int step = 0; step < steps; step++) {
1294                                 const Sci::Line prevLinesTotal = LinesTotal();
1295                                 const Action &action = cb.GetUndoStep();
1296                                 if (action.at == removeAction) {
1297                                         NotifyModified(DocModification(
1298                                                                         SC_MOD_BEFOREINSERT | SC_PERFORMED_UNDO, action));
1299                                 } else if (action.at == containerAction) {
1300                                         DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_UNDO);
1301                                         dm.token = action.position;
1302                                         NotifyModified(dm);
1303                                         if (!action.mayCoalesce) {
1304                                                 coalescedRemovePos = -1;
1305                                                 coalescedRemoveLen = 0;
1306                                                 prevRemoveActionPos = -1;
1307                                                 prevRemoveActionLen = 0;
1308                                         }
1309                                 } else {
1310                                         NotifyModified(DocModification(
1311                                                                         SC_MOD_BEFOREDELETE | SC_PERFORMED_UNDO, action));
1312                                 }
1313                                 cb.PerformUndoStep();
1314                                 if (action.at != containerAction) {
1315                                         ModifiedAt(action.position);
1316                                         newPos = action.position;
1317                                 }
1318
1319                                 int modFlags = SC_PERFORMED_UNDO;
1320                                 // With undo, an insertion action becomes a deletion notification
1321                                 if (action.at == removeAction) {
1322                                         newPos += action.lenData;
1323                                         modFlags |= SC_MOD_INSERTTEXT;
1324                                         if ((coalescedRemoveLen > 0) &&
1325                                                 (action.position == prevRemoveActionPos || action.position == (prevRemoveActionPos + prevRemoveActionLen))) {
1326                                                 coalescedRemoveLen += action.lenData;
1327                                                 newPos = coalescedRemovePos + coalescedRemoveLen;
1328                                         } else {
1329                                                 coalescedRemovePos = action.position;
1330                                                 coalescedRemoveLen = action.lenData;
1331                                         }
1332                                         prevRemoveActionPos = action.position;
1333                                         prevRemoveActionLen = action.lenData;
1334                                 } else if (action.at == insertAction) {
1335                                         modFlags |= SC_MOD_DELETETEXT;
1336                                         coalescedRemovePos = -1;
1337                                         coalescedRemoveLen = 0;
1338                                         prevRemoveActionPos = -1;
1339                                         prevRemoveActionLen = 0;
1340                                 }
1341                                 if (steps > 1)
1342                                         modFlags |= SC_MULTISTEPUNDOREDO;
1343                                 const Sci::Line linesAdded = LinesTotal() - prevLinesTotal;
1344                                 if (linesAdded != 0)
1345                                         multiLine = true;
1346                                 if (step == steps - 1) {
1347                                         modFlags |= SC_LASTSTEPINUNDOREDO;
1348                                         if (multiLine)
1349                                                 modFlags |= SC_MULTILINEUNDOREDO;
1350                                 }
1351                                 NotifyModified(DocModification(modFlags, action.position, action.lenData,
1352                                                                                            linesAdded, action.data.get()));
1353                         }
1354
1355                         const bool endSavePoint = cb.IsSavePoint();
1356                         if (startSavePoint != endSavePoint)
1357                                 NotifySavePoint(endSavePoint);
1358                 }
1359                 enteredModification--;
1360         }
1361         return newPos;
1362 }
1363
1364 Sci::Position Document::Redo() {
1365         Sci::Position newPos = -1;
1366         CheckReadOnly();
1367         if ((enteredModification == 0) && (cb.IsCollectingUndo())) {
1368                 enteredModification++;
1369                 if (!cb.IsReadOnly()) {
1370                         const bool startSavePoint = cb.IsSavePoint();
1371                         bool multiLine = false;
1372                         const int steps = cb.StartRedo();
1373                         for (int step = 0; step < steps; step++) {
1374                                 const Sci::Line prevLinesTotal = LinesTotal();
1375                                 const Action &action = cb.GetRedoStep();
1376                                 if (action.at == insertAction) {
1377                                         NotifyModified(DocModification(
1378                                                                         SC_MOD_BEFOREINSERT | SC_PERFORMED_REDO, action));
1379                                 } else if (action.at == containerAction) {
1380                                         DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_REDO);
1381                                         dm.token = action.position;
1382                                         NotifyModified(dm);
1383                                 } else {
1384                                         NotifyModified(DocModification(
1385                                                                         SC_MOD_BEFOREDELETE | SC_PERFORMED_REDO, action));
1386                                 }
1387                                 cb.PerformRedoStep();
1388                                 if (action.at != containerAction) {
1389                                         ModifiedAt(action.position);
1390                                         newPos = action.position;
1391                                 }
1392
1393                                 int modFlags = SC_PERFORMED_REDO;
1394                                 if (action.at == insertAction) {
1395                                         newPos += action.lenData;
1396                                         modFlags |= SC_MOD_INSERTTEXT;
1397                                 } else if (action.at == removeAction) {
1398                                         modFlags |= SC_MOD_DELETETEXT;
1399                                 }
1400                                 if (steps > 1)
1401                                         modFlags |= SC_MULTISTEPUNDOREDO;
1402                                 const Sci::Line linesAdded = LinesTotal() - prevLinesTotal;
1403                                 if (linesAdded != 0)
1404                                         multiLine = true;
1405                                 if (step == steps - 1) {
1406                                         modFlags |= SC_LASTSTEPINUNDOREDO;
1407                                         if (multiLine)
1408                                                 modFlags |= SC_MULTILINEUNDOREDO;
1409                                 }
1410                                 NotifyModified(
1411                                         DocModification(modFlags, action.position, action.lenData,
1412                                                                         linesAdded, action.data.get()));
1413                         }
1414
1415                         const bool endSavePoint = cb.IsSavePoint();
1416                         if (startSavePoint != endSavePoint)
1417                                 NotifySavePoint(endSavePoint);
1418                 }
1419                 enteredModification--;
1420         }
1421         return newPos;
1422 }
1423
1424 void Document::DelChar(Sci::Position pos) {
1425         DeleteChars(pos, LenChar(pos));
1426 }
1427
1428 void Document::DelCharBack(Sci::Position pos) {
1429         if (pos <= 0) {
1430                 return;
1431         } else if (IsCrLf(pos - 2)) {
1432                 DeleteChars(pos - 2, 2);
1433         } else if (dbcsCodePage) {
1434                 const Sci::Position startChar = NextPosition(pos, -1);
1435                 DeleteChars(startChar, pos - startChar);
1436         } else {
1437                 DeleteChars(pos - 1, 1);
1438         }
1439 }
1440
1441 static constexpr Sci::Position NextTab(Sci::Position pos, Sci::Position tabSize) noexcept {
1442         return ((pos / tabSize) + 1) * tabSize;
1443 }
1444
1445 static std::string CreateIndentation(Sci::Position indent, int tabSize, bool insertSpaces) {
1446         std::string indentation;
1447         if (!insertSpaces) {
1448                 while (indent >= tabSize) {
1449                         indentation += '\t';
1450                         indent -= tabSize;
1451                 }
1452         }
1453         while (indent > 0) {
1454                 indentation += ' ';
1455                 indent--;
1456         }
1457         return indentation;
1458 }
1459
1460 int SCI_METHOD Document::GetLineIndentation(Sci_Position line) {
1461         int indent = 0;
1462         if ((line >= 0) && (line < LinesTotal())) {
1463                 const Sci::Position lineStart = LineStart(line);
1464                 const Sci::Position length = Length();
1465                 for (Sci::Position i = lineStart; i < length; i++) {
1466                         const char ch = cb.CharAt(i);
1467                         if (ch == ' ')
1468                                 indent++;
1469                         else if (ch == '\t')
1470                                 indent = static_cast<int>(NextTab(indent, tabInChars));
1471                         else
1472                                 return indent;
1473                 }
1474         }
1475         return indent;
1476 }
1477
1478 Sci::Position Document::SetLineIndentation(Sci::Line line, Sci::Position indent) {
1479         const int indentOfLine = GetLineIndentation(line);
1480         if (indent < 0)
1481                 indent = 0;
1482         if (indent != indentOfLine) {
1483                 std::string linebuf = CreateIndentation(indent, tabInChars, !useTabs);
1484                 const Sci::Position thisLineStart = LineStart(line);
1485                 const Sci::Position indentPos = GetLineIndentPosition(line);
1486                 UndoGroup ug(this);
1487                 DeleteChars(thisLineStart, indentPos - thisLineStart);
1488                 return thisLineStart + InsertString(thisLineStart, linebuf.c_str(),
1489                         linebuf.length());
1490         } else {
1491                 return GetLineIndentPosition(line);
1492         }
1493 }
1494
1495 Sci::Position Document::GetLineIndentPosition(Sci::Line line) const {
1496         if (line < 0)
1497                 return 0;
1498         Sci::Position pos = LineStart(line);
1499         const Sci::Position length = Length();
1500         while ((pos < length) && IsSpaceOrTab(cb.CharAt(pos))) {
1501                 pos++;
1502         }
1503         return pos;
1504 }
1505
1506 Sci::Position Document::GetColumn(Sci::Position pos) {
1507         Sci::Position column = 0;
1508         const Sci::Line line = SciLineFromPosition(pos);
1509         if ((line >= 0) && (line < LinesTotal())) {
1510                 for (Sci::Position i = LineStart(line); i < pos;) {
1511                         const char ch = cb.CharAt(i);
1512                         if (ch == '\t') {
1513                                 column = NextTab(column, tabInChars);
1514                                 i++;
1515                         } else if (ch == '\r') {
1516                                 return column;
1517                         } else if (ch == '\n') {
1518                                 return column;
1519                         } else if (i >= Length()) {
1520                                 return column;
1521                         } else {
1522                                 column++;
1523                                 i = NextPosition(i, 1);
1524                         }
1525                 }
1526         }
1527         return column;
1528 }
1529
1530 Sci::Position Document::CountCharacters(Sci::Position startPos, Sci::Position endPos) const {
1531         startPos = MovePositionOutsideChar(startPos, 1, false);
1532         endPos = MovePositionOutsideChar(endPos, -1, false);
1533         Sci::Position count = 0;
1534         Sci::Position i = startPos;
1535         while (i < endPos) {
1536                 count++;
1537                 i = NextPosition(i, 1);
1538         }
1539         return count;
1540 }
1541
1542 Sci::Position Document::CountUTF16(Sci::Position startPos, Sci::Position endPos) const {
1543         startPos = MovePositionOutsideChar(startPos, 1, false);
1544         endPos = MovePositionOutsideChar(endPos, -1, false);
1545         Sci::Position count = 0;
1546         Sci::Position i = startPos;
1547         while (i < endPos) {
1548                 count++;
1549                 const Sci::Position next = NextPosition(i, 1);
1550                 if ((next - i) > 3)
1551                         count++;
1552                 i = next;
1553         }
1554         return count;
1555 }
1556
1557 Sci::Position Document::FindColumn(Sci::Line line, Sci::Position column) {
1558         Sci::Position position = LineStart(line);
1559         if ((line >= 0) && (line < LinesTotal())) {
1560                 Sci::Position columnCurrent = 0;
1561                 while ((columnCurrent < column) && (position < Length())) {
1562                         const char ch = cb.CharAt(position);
1563                         if (ch == '\t') {
1564                                 columnCurrent = NextTab(columnCurrent, tabInChars);
1565                                 if (columnCurrent > column)
1566                                         return position;
1567                                 position++;
1568                         } else if (ch == '\r') {
1569                                 return position;
1570                         } else if (ch == '\n') {
1571                                 return position;
1572                         } else {
1573                                 columnCurrent++;
1574                                 position = NextPosition(position, 1);
1575                         }
1576                 }
1577         }
1578         return position;
1579 }
1580
1581 void Document::Indent(bool forwards, Sci::Line lineBottom, Sci::Line lineTop) {
1582         // Dedent - suck white space off the front of the line to dedent by equivalent of a tab
1583         for (Sci::Line line = lineBottom; line >= lineTop; line--) {
1584                 const Sci::Position indentOfLine = GetLineIndentation(line);
1585                 if (forwards) {
1586                         if (LineStart(line) < LineEnd(line)) {
1587                                 SetLineIndentation(line, indentOfLine + IndentSize());
1588                         }
1589                 } else {
1590                         SetLineIndentation(line, indentOfLine - IndentSize());
1591                 }
1592         }
1593 }
1594
1595 // Convert line endings for a piece of text to a particular mode.
1596 // Stop at len or when a NUL is found.
1597 std::string Document::TransformLineEnds(const char *s, size_t len, int eolModeWanted) {
1598         std::string dest;
1599         for (size_t i = 0; (i < len) && (s[i]); i++) {
1600                 if (s[i] == '\n' || s[i] == '\r') {
1601                         if (eolModeWanted == SC_EOL_CR) {
1602                                 dest.push_back('\r');
1603                         } else if (eolModeWanted == SC_EOL_LF) {
1604                                 dest.push_back('\n');
1605                         } else { // eolModeWanted == SC_EOL_CRLF
1606                                 dest.push_back('\r');
1607                                 dest.push_back('\n');
1608                         }
1609                         if ((s[i] == '\r') && (i+1 < len) && (s[i+1] == '\n')) {
1610                                 i++;
1611                         }
1612                 } else {
1613                         dest.push_back(s[i]);
1614                 }
1615         }
1616         return dest;
1617 }
1618
1619 void Document::ConvertLineEnds(int eolModeSet) {
1620         UndoGroup ug(this);
1621
1622         for (Sci::Position pos = 0; pos < Length(); pos++) {
1623                 if (cb.CharAt(pos) == '\r') {
1624                         if (cb.CharAt(pos + 1) == '\n') {
1625                                 // CRLF
1626                                 if (eolModeSet == SC_EOL_CR) {
1627                                         DeleteChars(pos + 1, 1); // Delete the LF
1628                                 } else if (eolModeSet == SC_EOL_LF) {
1629                                         DeleteChars(pos, 1); // Delete the CR
1630                                 } else {
1631                                         pos++;
1632                                 }
1633                         } else {
1634                                 // CR
1635                                 if (eolModeSet == SC_EOL_CRLF) {
1636                                         pos += InsertString(pos + 1, "\n", 1); // Insert LF
1637                                 } else if (eolModeSet == SC_EOL_LF) {
1638                                         pos += InsertString(pos, "\n", 1); // Insert LF
1639                                         DeleteChars(pos, 1); // Delete CR
1640                                         pos--;
1641                                 }
1642                         }
1643                 } else if (cb.CharAt(pos) == '\n') {
1644                         // LF
1645                         if (eolModeSet == SC_EOL_CRLF) {
1646                                 pos += InsertString(pos, "\r", 1); // Insert CR
1647                         } else if (eolModeSet == SC_EOL_CR) {
1648                                 pos += InsertString(pos, "\r", 1); // Insert CR
1649                                 DeleteChars(pos, 1); // Delete LF
1650                                 pos--;
1651                         }
1652                 }
1653         }
1654
1655 }
1656
1657 int Document::Options() const {
1658         return (IsLarge() ? SC_DOCUMENTOPTION_TEXT_LARGE : 0) |
1659                 (cb.HasStyles() ? 0 : SC_DOCUMENTOPTION_STYLES_NONE);
1660 }
1661
1662 bool Document::IsWhiteLine(Sci::Line line) const {
1663         Sci::Position currentChar = LineStart(line);
1664         const Sci::Position endLine = LineEnd(line);
1665         while (currentChar < endLine) {
1666                 if (!IsSpaceOrTab(cb.CharAt(currentChar))) {
1667                         return false;
1668                 }
1669                 ++currentChar;
1670         }
1671         return true;
1672 }
1673
1674 Sci::Position Document::ParaUp(Sci::Position pos) const {
1675         Sci::Line line = SciLineFromPosition(pos);
1676         line--;
1677         while (line >= 0 && IsWhiteLine(line)) { // skip empty lines
1678                 line--;
1679         }
1680         while (line >= 0 && !IsWhiteLine(line)) { // skip non-empty lines
1681                 line--;
1682         }
1683         line++;
1684         return LineStart(line);
1685 }
1686
1687 Sci::Position Document::ParaDown(Sci::Position pos) const {
1688         Sci::Line line = SciLineFromPosition(pos);
1689         while (line < LinesTotal() && !IsWhiteLine(line)) { // skip non-empty lines
1690                 line++;
1691         }
1692         while (line < LinesTotal() && IsWhiteLine(line)) { // skip empty lines
1693                 line++;
1694         }
1695         if (line < LinesTotal())
1696                 return LineStart(line);
1697         else // end of a document
1698                 return LineEnd(line-1);
1699 }
1700
1701 bool Document::IsASCIIWordByte(unsigned char ch) const {
1702         if (IsASCII(ch)) {
1703                 return charClass.GetClass(ch) == CharClassify::ccWord;
1704         } else {
1705                 return false;
1706         }
1707 }
1708
1709 CharClassify::cc Document::WordCharacterClass(unsigned int ch) const {
1710         if (dbcsCodePage && (!UTF8IsAscii(ch))) {
1711                 if (SC_CP_UTF8 == dbcsCodePage) {
1712                         // Use hard coded Unicode class
1713                         const CharacterCategory cc = charMap.CategoryFor(ch);
1714                         switch (cc) {
1715
1716                                 // Separator, Line/Paragraph
1717                         case ccZl:
1718                         case ccZp:
1719                                 return CharClassify::ccNewLine;
1720
1721                                 // Separator, Space
1722                         case ccZs:
1723                                 // Other
1724                         case ccCc:
1725                         case ccCf:
1726                         case ccCs:
1727                         case ccCo:
1728                         case ccCn:
1729                                 return CharClassify::ccSpace;
1730
1731                                 // Letter
1732                         case ccLu:
1733                         case ccLl:
1734                         case ccLt:
1735                         case ccLm:
1736                         case ccLo:
1737                                 // Number
1738                         case ccNd:
1739                         case ccNl:
1740                         case ccNo:
1741                                 // Mark - includes combining diacritics
1742                         case ccMn:
1743                         case ccMc:
1744                         case ccMe:
1745                                 return CharClassify::ccWord;
1746
1747                                 // Punctuation
1748                         case ccPc:
1749                         case ccPd:
1750                         case ccPs:
1751                         case ccPe:
1752                         case ccPi:
1753                         case ccPf:
1754                         case ccPo:
1755                                 // Symbol
1756                         case ccSm:
1757                         case ccSc:
1758                         case ccSk:
1759                         case ccSo:
1760                                 return CharClassify::ccPunctuation;
1761
1762                         }
1763                 } else {
1764                         // Asian DBCS
1765                         return CharClassify::ccWord;
1766                 }
1767         }
1768         return charClass.GetClass(static_cast<unsigned char>(ch));
1769 }
1770
1771 /**
1772  * Used by commmands that want to select whole words.
1773  * Finds the start of word at pos when delta < 0 or the end of the word when delta >= 0.
1774  */
1775 Sci::Position Document::ExtendWordSelect(Sci::Position pos, int delta, bool onlyWordCharacters) const {
1776         CharClassify::cc ccStart = CharClassify::ccWord;
1777         if (delta < 0) {
1778                 if (!onlyWordCharacters) {
1779                         const CharacterExtracted ce = CharacterBefore(pos);
1780                         ccStart = WordCharacterClass(ce.character);
1781                 }
1782                 while (pos > 0) {
1783                         const CharacterExtracted ce = CharacterBefore(pos);
1784                         if (WordCharacterClass(ce.character) != ccStart)
1785                                 break;
1786                         pos -= ce.widthBytes;
1787                 }
1788         } else {
1789                 if (!onlyWordCharacters && pos < Length()) {
1790                         const CharacterExtracted ce = CharacterAfter(pos);
1791                         ccStart = WordCharacterClass(ce.character);
1792                 }
1793                 while (pos < Length()) {
1794                         const CharacterExtracted ce = CharacterAfter(pos);
1795                         if (WordCharacterClass(ce.character) != ccStart)
1796                                 break;
1797                         pos += ce.widthBytes;
1798                 }
1799         }
1800         return MovePositionOutsideChar(pos, delta, true);
1801 }
1802
1803 /**
1804  * Find the start of the next word in either a forward (delta >= 0) or backwards direction
1805  * (delta < 0).
1806  * This is looking for a transition between character classes although there is also some
1807  * additional movement to transit white space.
1808  * Used by cursor movement by word commands.
1809  */
1810 Sci::Position Document::NextWordStart(Sci::Position pos, int delta) const {
1811         if (delta < 0) {
1812                 while (pos > 0) {
1813                         const CharacterExtracted ce = CharacterBefore(pos);
1814                         if (WordCharacterClass(ce.character) != CharClassify::ccSpace)
1815                                 break;
1816                         pos -= ce.widthBytes;
1817                 }
1818                 if (pos > 0) {
1819                         CharacterExtracted ce = CharacterBefore(pos);
1820                         const CharClassify::cc ccStart = WordCharacterClass(ce.character);
1821                         while (pos > 0) {
1822                                 ce = CharacterBefore(pos);
1823                                 if (WordCharacterClass(ce.character) != ccStart)
1824                                         break;
1825                                 pos -= ce.widthBytes;
1826                         }
1827                 }
1828         } else {
1829                 CharacterExtracted ce = CharacterAfter(pos);
1830                 const CharClassify::cc ccStart = WordCharacterClass(ce.character);
1831                 while (pos < Length()) {
1832                         ce = CharacterAfter(pos);
1833                         if (WordCharacterClass(ce.character) != ccStart)
1834                                 break;
1835                         pos += ce.widthBytes;
1836                 }
1837                 while (pos < Length()) {
1838                         ce = CharacterAfter(pos);
1839                         if (WordCharacterClass(ce.character) != CharClassify::ccSpace)
1840                                 break;
1841                         pos += ce.widthBytes;
1842                 }
1843         }
1844         return pos;
1845 }
1846
1847 /**
1848  * Find the end of the next word in either a forward (delta >= 0) or backwards direction
1849  * (delta < 0).
1850  * This is looking for a transition between character classes although there is also some
1851  * additional movement to transit white space.
1852  * Used by cursor movement by word commands.
1853  */
1854 Sci::Position Document::NextWordEnd(Sci::Position pos, int delta) const {
1855         if (delta < 0) {
1856                 if (pos > 0) {
1857                         CharacterExtracted ce = CharacterBefore(pos);
1858                         const CharClassify::cc ccStart = WordCharacterClass(ce.character);
1859                         if (ccStart != CharClassify::ccSpace) {
1860                                 while (pos > 0) {
1861                                         ce = CharacterBefore(pos);
1862                                         if (WordCharacterClass(ce.character) != ccStart)
1863                                                 break;
1864                                         pos -= ce.widthBytes;
1865                                 }
1866                         }
1867                         while (pos > 0) {
1868                                 ce = CharacterBefore(pos);
1869                                 if (WordCharacterClass(ce.character) != CharClassify::ccSpace)
1870                                         break;
1871                                 pos -= ce.widthBytes;
1872                         }
1873                 }
1874         } else {
1875                 while (pos < Length()) {
1876                         const CharacterExtracted ce = CharacterAfter(pos);
1877                         if (WordCharacterClass(ce.character) != CharClassify::ccSpace)
1878                                 break;
1879                         pos += ce.widthBytes;
1880                 }
1881                 if (pos < Length()) {
1882                         CharacterExtracted ce = CharacterAfter(pos);
1883                         const CharClassify::cc ccStart = WordCharacterClass(ce.character);
1884                         while (pos < Length()) {
1885                                 ce = CharacterAfter(pos);
1886                                 if (WordCharacterClass(ce.character) != ccStart)
1887                                         break;
1888                                 pos += ce.widthBytes;
1889                         }
1890                 }
1891         }
1892         return pos;
1893 }
1894
1895 /**
1896  * Check that the character at the given position is a word or punctuation character and that
1897  * the previous character is of a different character class.
1898  */
1899 bool Document::IsWordStartAt(Sci::Position pos) const {
1900         if (pos >= Length())
1901                 return false;
1902         if (pos > 0) {
1903                 const CharacterExtracted cePos = CharacterAfter(pos);
1904                 const CharClassify::cc ccPos = WordCharacterClass(cePos.character);
1905                 const CharacterExtracted cePrev = CharacterBefore(pos);
1906                 const CharClassify::cc ccPrev = WordCharacterClass(cePrev.character);
1907                 return (ccPos == CharClassify::ccWord || ccPos == CharClassify::ccPunctuation) &&
1908                         (ccPos != ccPrev);
1909         }
1910         return true;
1911 }
1912
1913 /**
1914  * Check that the character at the given position is a word or punctuation character and that
1915  * the next character is of a different character class.
1916  */
1917 bool Document::IsWordEndAt(Sci::Position pos) const {
1918         if (pos <= 0)
1919                 return false;
1920         if (pos < Length()) {
1921                 const CharacterExtracted cePos = CharacterAfter(pos);
1922                 const CharClassify::cc ccPos = WordCharacterClass(cePos.character);
1923                 const CharacterExtracted cePrev = CharacterBefore(pos);
1924                 const CharClassify::cc ccPrev = WordCharacterClass(cePrev.character);
1925                 return (ccPrev == CharClassify::ccWord || ccPrev == CharClassify::ccPunctuation) &&
1926                         (ccPrev != ccPos);
1927         }
1928         return true;
1929 }
1930
1931 /**
1932  * Check that the given range is has transitions between character classes at both
1933  * ends and where the characters on the inside are word or punctuation characters.
1934  */
1935 bool Document::IsWordAt(Sci::Position start, Sci::Position end) const {
1936         return (start < end) && IsWordStartAt(start) && IsWordEndAt(end);
1937 }
1938
1939 bool Document::MatchesWordOptions(bool word, bool wordStart, Sci::Position pos, Sci::Position length) const {
1940         return (!word && !wordStart) ||
1941                         (word && IsWordAt(pos, pos + length)) ||
1942                         (wordStart && IsWordStartAt(pos));
1943 }
1944
1945 bool Document::HasCaseFolder() const noexcept {
1946         return pcf != nullptr;
1947 }
1948
1949 void Document::SetCaseFolder(CaseFolder *pcf_) {
1950         pcf.reset(pcf_);
1951 }
1952
1953 Document::CharacterExtracted Document::ExtractCharacter(Sci::Position position) const noexcept {
1954         const unsigned char leadByte = cb.UCharAt(position);
1955         if (UTF8IsAscii(leadByte)) {
1956                 // Common case: ASCII character
1957                 return CharacterExtracted(leadByte, 1);
1958         }
1959         const int widthCharBytes = UTF8BytesOfLead[leadByte];
1960         unsigned char charBytes[UTF8MaxBytes] = { leadByte, 0, 0, 0 };
1961         for (int b=1; b<widthCharBytes; b++)
1962                 charBytes[b] = cb.UCharAt(position + b);
1963         const int utf8status = UTF8Classify(charBytes, widthCharBytes);
1964         if (utf8status & UTF8MaskInvalid) {
1965                 // Treat as invalid and use up just one byte
1966                 return CharacterExtracted(unicodeReplacementChar, 1);
1967         } else {
1968                 return CharacterExtracted(UnicodeFromUTF8(charBytes), utf8status & UTF8MaskWidth);
1969         }
1970 }
1971
1972 /**
1973  * Find text in document, supporting both forward and backward
1974  * searches (just pass minPos > maxPos to do a backward search)
1975  * Has not been tested with backwards DBCS searches yet.
1976  */
1977 Sci::Position Document::FindText(Sci::Position minPos, Sci::Position maxPos, const char *search,
1978                         int flags, Sci::Position *length) {
1979         if (*length <= 0)
1980                 return minPos;
1981         const bool caseSensitive = (flags & SCFIND_MATCHCASE) != 0;
1982         const bool word = (flags & SCFIND_WHOLEWORD) != 0;
1983         const bool wordStart = (flags & SCFIND_WORDSTART) != 0;
1984         const bool regExp = (flags & SCFIND_REGEXP) != 0;
1985         if (regExp) {
1986                 if (!regex)
1987                         regex = std::unique_ptr<RegexSearchBase>(CreateRegexSearch(&charClass));
1988                 return regex->FindText(this, minPos, maxPos, search, caseSensitive, word, wordStart, flags, length);
1989         } else {
1990
1991                 const bool forward = minPos <= maxPos;
1992                 const int increment = forward ? 1 : -1;
1993
1994                 // Range endpoints should not be inside DBCS characters, but just in case, move them.
1995                 const Sci::Position startPos = MovePositionOutsideChar(minPos, increment, false);
1996                 const Sci::Position endPos = MovePositionOutsideChar(maxPos, increment, false);
1997
1998                 // Compute actual search ranges needed
1999                 const Sci::Position lengthFind = *length;
2000
2001                 //Platform::DebugPrintf("Find %d %d %s %d\n", startPos, endPos, ft->lpstrText, lengthFind);
2002                 const Sci::Position limitPos = std::max(startPos, endPos);
2003                 Sci::Position pos = startPos;
2004                 if (!forward) {
2005                         // Back all of a character
2006                         pos = NextPosition(pos, increment);
2007                 }
2008                 if (caseSensitive) {
2009                         const Sci::Position endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos;
2010                         const char charStartSearch =  search[0];
2011                         while (forward ? (pos < endSearch) : (pos >= endSearch)) {
2012                                 if (CharAt(pos) == charStartSearch) {
2013                                         bool found = (pos + lengthFind) <= limitPos;
2014                                         for (int indexSearch = 1; (indexSearch < lengthFind) && found; indexSearch++) {
2015                                                 found = CharAt(pos + indexSearch) == search[indexSearch];
2016                                         }
2017                                         if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
2018                                                 return pos;
2019                                         }
2020                                 }
2021                                 if (!NextCharacter(pos, increment))
2022                                         break;
2023                         }
2024                 } else if (SC_CP_UTF8 == dbcsCodePage) {
2025                         const size_t maxFoldingExpansion = 4;
2026                         std::vector<char> searchThing((lengthFind+1) * UTF8MaxBytes * maxFoldingExpansion + 1);
2027                         const size_t lenSearch =
2028                                 pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind);
2029                         char bytes[UTF8MaxBytes + 1] = "";
2030                         char folded[UTF8MaxBytes * maxFoldingExpansion + 1] = "";
2031                         while (forward ? (pos < endPos) : (pos >= endPos)) {
2032                                 int widthFirstCharacter = 0;
2033                                 Sci::Position posIndexDocument = pos;
2034                                 size_t indexSearch = 0;
2035                                 bool characterMatches = true;
2036                                 for (;;) {
2037                                         const unsigned char leadByte = cb.UCharAt(posIndexDocument);
2038                                         bytes[0] = leadByte;
2039                                         int widthChar = 1;
2040                                         if (!UTF8IsAscii(leadByte)) {
2041                                                 const int widthCharBytes = UTF8BytesOfLead[leadByte];
2042                                                 for (int b=1; b<widthCharBytes; b++) {
2043                                                         bytes[b] = cb.CharAt(posIndexDocument+b);
2044                                                 }
2045                                                 widthChar = UTF8Classify(reinterpret_cast<const unsigned char *>(bytes), widthCharBytes) & UTF8MaskWidth;
2046                                         }
2047                                         if (!widthFirstCharacter)
2048                                                 widthFirstCharacter = widthChar;
2049                                         if ((posIndexDocument + widthChar) > limitPos)
2050                                                 break;
2051                                         const size_t lenFlat = pcf->Fold(folded, sizeof(folded), bytes, widthChar);
2052                                         // memcmp may examine lenFlat bytes in both arguments so assert it doesn't read past end of searchThing
2053                                         assert((indexSearch + lenFlat) <= searchThing.size());
2054                                         // Does folded match the buffer
2055                                         characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat);
2056                                         if (!characterMatches)
2057                                                 break;
2058                                         posIndexDocument += widthChar;
2059                                         indexSearch += lenFlat;
2060                                         if (indexSearch >= lenSearch)
2061                                                 break;
2062                                 }
2063                                 if (characterMatches && (indexSearch == lenSearch)) {
2064                                         if (MatchesWordOptions(word, wordStart, pos, posIndexDocument - pos)) {
2065                                                 *length = posIndexDocument - pos;
2066                                                 return pos;
2067                                         }
2068                                 }
2069                                 if (forward) {
2070                                         pos += widthFirstCharacter;
2071                                 } else {
2072                                         if (!NextCharacter(pos, increment))
2073                                                 break;
2074                                 }
2075                         }
2076                 } else if (dbcsCodePage) {
2077                         const size_t maxBytesCharacter = 2;
2078                         const size_t maxFoldingExpansion = 4;
2079                         std::vector<char> searchThing((lengthFind+1) * maxBytesCharacter * maxFoldingExpansion + 1);
2080                         const size_t lenSearch = pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind);
2081                         while (forward ? (pos < endPos) : (pos >= endPos)) {
2082                                 Sci::Position indexDocument = 0;
2083                                 size_t indexSearch = 0;
2084                                 bool characterMatches = true;
2085                                 while (characterMatches &&
2086                                         ((pos + indexDocument) < limitPos) &&
2087                                         (indexSearch < lenSearch)) {
2088                                         char bytes[maxBytesCharacter + 1];
2089                                         bytes[0] = cb.CharAt(pos + indexDocument);
2090                                         const Sci::Position widthChar = IsDBCSLeadByteNoExcept(bytes[0]) ? 2 : 1;
2091                                         if (widthChar == 2)
2092                                                 bytes[1] = cb.CharAt(pos + indexDocument + 1);
2093                                         if ((pos + indexDocument + widthChar) > limitPos)
2094                                                 break;
2095                                         char folded[maxBytesCharacter * maxFoldingExpansion + 1];
2096                                         const size_t lenFlat = pcf->Fold(folded, sizeof(folded), bytes, widthChar);
2097                                         // memcmp may examine lenFlat bytes in both arguments so assert it doesn't read past end of searchThing
2098                                         assert((indexSearch + lenFlat) <= searchThing.size());
2099                                         // Does folded match the buffer
2100                                         characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat);
2101                                         indexDocument += widthChar;
2102                                         indexSearch += lenFlat;
2103                                 }
2104                                 if (characterMatches && (indexSearch == lenSearch)) {
2105                                         if (MatchesWordOptions(word, wordStart, pos, indexDocument)) {
2106                                                 *length = indexDocument;
2107                                                 return pos;
2108                                         }
2109                                 }
2110                                 if (!NextCharacter(pos, increment))
2111                                         break;
2112                         }
2113                 } else {
2114                         const Sci::Position endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos;
2115                         std::vector<char> searchThing(lengthFind + 1);
2116                         pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind);
2117                         while (forward ? (pos < endSearch) : (pos >= endSearch)) {
2118                                 bool found = (pos + lengthFind) <= limitPos;
2119                                 for (int indexSearch = 0; (indexSearch < lengthFind) && found; indexSearch++) {
2120                                         const char ch = CharAt(pos + indexSearch);
2121                                         char folded[2];
2122                                         pcf->Fold(folded, sizeof(folded), &ch, 1);
2123                                         found = folded[0] == searchThing[indexSearch];
2124                                 }
2125                                 if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
2126                                         return pos;
2127                                 }
2128                                 if (!NextCharacter(pos, increment))
2129                                         break;
2130                         }
2131                 }
2132         }
2133         //Platform::DebugPrintf("Not found\n");
2134         return -1;
2135 }
2136
2137 const char *Document::SubstituteByPosition(const char *text, Sci::Position *length) {
2138         if (regex)
2139                 return regex->SubstituteByPosition(this, text, length);
2140         else
2141                 return nullptr;
2142 }
2143
2144 int Document::LineCharacterIndex() const {
2145         return cb.LineCharacterIndex();
2146 }
2147
2148 void Document::AllocateLineCharacterIndex(int lineCharacterIndex) {
2149         return cb.AllocateLineCharacterIndex(lineCharacterIndex);
2150 }
2151
2152 void Document::ReleaseLineCharacterIndex(int lineCharacterIndex) {
2153         return cb.ReleaseLineCharacterIndex(lineCharacterIndex);
2154 }
2155
2156 Sci::Line Document::LinesTotal() const noexcept {
2157         return cb.Lines();
2158 }
2159
2160 void Document::SetDefaultCharClasses(bool includeWordClass) {
2161     charClass.SetDefaultCharClasses(includeWordClass);
2162 }
2163
2164 void Document::SetCharClasses(const unsigned char *chars, CharClassify::cc newCharClass) {
2165     charClass.SetCharClasses(chars, newCharClass);
2166 }
2167
2168 int Document::GetCharsOfClass(CharClassify::cc characterClass, unsigned char *buffer) const {
2169     return charClass.GetCharsOfClass(characterClass, buffer);
2170 }
2171
2172 void Document::SetCharacterCategoryOptimization(int countCharacters) {
2173         charMap.Optimize(countCharacters);
2174 }
2175
2176 int Document::CharacterCategoryOptimization() const noexcept {
2177         return charMap.Size();
2178 }
2179
2180 void SCI_METHOD Document::StartStyling(Sci_Position position, char) {
2181         endStyled = position;
2182 }
2183
2184 bool SCI_METHOD Document::SetStyleFor(Sci_Position length, char style) {
2185         if (enteredStyling != 0) {
2186                 return false;
2187         } else {
2188                 enteredStyling++;
2189                 const Sci::Position prevEndStyled = endStyled;
2190                 if (cb.SetStyleFor(endStyled, length, style)) {
2191                         const DocModification mh(SC_MOD_CHANGESTYLE | SC_PERFORMED_USER,
2192                                            prevEndStyled, length);
2193                         NotifyModified(mh);
2194                 }
2195                 endStyled += length;
2196                 enteredStyling--;
2197                 return true;
2198         }
2199 }
2200
2201 bool SCI_METHOD Document::SetStyles(Sci_Position length, const char *styles) {
2202         if (enteredStyling != 0) {
2203                 return false;
2204         } else {
2205                 enteredStyling++;
2206                 bool didChange = false;
2207                 Sci::Position startMod = 0;
2208                 Sci::Position endMod = 0;
2209                 for (int iPos = 0; iPos < length; iPos++, endStyled++) {
2210                         PLATFORM_ASSERT(endStyled < Length());
2211                         if (cb.SetStyleAt(endStyled, styles[iPos])) {
2212                                 if (!didChange) {
2213                                         startMod = endStyled;
2214                                 }
2215                                 didChange = true;
2216                                 endMod = endStyled;
2217                         }
2218                 }
2219                 if (didChange) {
2220                         const DocModification mh(SC_MOD_CHANGESTYLE | SC_PERFORMED_USER,
2221                                            startMod, endMod - startMod + 1);
2222                         NotifyModified(mh);
2223                 }
2224                 enteredStyling--;
2225                 return true;
2226         }
2227 }
2228
2229 void Document::EnsureStyledTo(Sci::Position pos) {
2230         if ((enteredStyling == 0) && (pos > GetEndStyled())) {
2231                 IncrementStyleClock();
2232                 if (pli && !pli->UseContainerLexing()) {
2233                         const Sci::Line lineEndStyled = SciLineFromPosition(GetEndStyled());
2234                         const Sci::Position endStyledTo = LineStart(lineEndStyled);
2235                         pli->Colourise(endStyledTo, pos);
2236                 } else {
2237                         // Ask the watchers to style, and stop as soon as one responds.
2238                         for (std::vector<WatcherWithUserData>::iterator it = watchers.begin();
2239                                 (pos > GetEndStyled()) && (it != watchers.end()); ++it) {
2240                                 it->watcher->NotifyStyleNeeded(this, it->userData, pos);
2241                         }
2242                 }
2243         }
2244 }
2245
2246 void Document::StyleToAdjustingLineDuration(Sci::Position pos) {
2247         const Sci::Line lineFirst = SciLineFromPosition(GetEndStyled());
2248         ElapsedPeriod epStyling;
2249         EnsureStyledTo(pos);
2250         const Sci::Line lineLast = SciLineFromPosition(GetEndStyled());
2251         durationStyleOneLine.AddSample(lineLast - lineFirst, epStyling.Duration());
2252 }
2253
2254 void Document::LexerChanged() {
2255         // Tell the watchers the lexer has changed.
2256         for (const WatcherWithUserData &watcher : watchers) {
2257                 watcher.watcher->NotifyLexerChanged(this, watcher.userData);
2258         }
2259 }
2260
2261 LexInterface *Document::GetLexInterface() const {
2262         return pli.get();
2263 }
2264
2265 void Document::SetLexInterface(LexInterface *pLexInterface) {
2266         pli.reset(pLexInterface);
2267 }
2268
2269 int SCI_METHOD Document::SetLineState(Sci_Position line, int state) {
2270         const int statePrevious = States()->SetLineState(static_cast<Sci::Line>(line), state);
2271         if (state != statePrevious) {
2272                 const DocModification mh(SC_MOD_CHANGELINESTATE, LineStart(line), 0, 0, nullptr,
2273                         static_cast<Sci::Line>(line));
2274                 NotifyModified(mh);
2275         }
2276         return statePrevious;
2277 }
2278
2279 int SCI_METHOD Document::GetLineState(Sci_Position line) const {
2280         return States()->GetLineState(static_cast<Sci::Line>(line));
2281 }
2282
2283 Sci::Line Document::GetMaxLineState() const {
2284         return States()->GetMaxLineState();
2285 }
2286
2287 void SCI_METHOD Document::ChangeLexerState(Sci_Position start, Sci_Position end) {
2288         const DocModification mh(SC_MOD_LEXERSTATE, start,
2289                 end-start, 0, 0, 0);
2290         NotifyModified(mh);
2291 }
2292
2293 StyledText Document::MarginStyledText(Sci::Line line) const {
2294         const LineAnnotation *pla = Margins();
2295         return StyledText(pla->Length(line), pla->Text(line),
2296                 pla->MultipleStyles(line), pla->Style(line), pla->Styles(line));
2297 }
2298
2299 void Document::MarginSetText(Sci::Line line, const char *text) {
2300         Margins()->SetText(line, text);
2301         const DocModification mh(SC_MOD_CHANGEMARGIN, LineStart(line),
2302                 0, 0, 0, line);
2303         NotifyModified(mh);
2304 }
2305
2306 void Document::MarginSetStyle(Sci::Line line, int style) {
2307         Margins()->SetStyle(line, style);
2308         NotifyModified(DocModification(SC_MOD_CHANGEMARGIN, LineStart(line),
2309                 0, 0, 0, line));
2310 }
2311
2312 void Document::MarginSetStyles(Sci::Line line, const unsigned char *styles) {
2313         Margins()->SetStyles(line, styles);
2314         NotifyModified(DocModification(SC_MOD_CHANGEMARGIN, LineStart(line),
2315                 0, 0, 0, line));
2316 }
2317
2318 void Document::MarginClearAll() {
2319         const Sci::Line maxEditorLine = LinesTotal();
2320         for (Sci::Line l=0; l<maxEditorLine; l++)
2321                 MarginSetText(l, nullptr);
2322         // Free remaining data
2323         Margins()->ClearAll();
2324 }
2325
2326 StyledText Document::AnnotationStyledText(Sci::Line line) const {
2327         const LineAnnotation *pla = Annotations();
2328         return StyledText(pla->Length(line), pla->Text(line),
2329                 pla->MultipleStyles(line), pla->Style(line), pla->Styles(line));
2330 }
2331
2332 void Document::AnnotationSetText(Sci::Line line, const char *text) {
2333         if (line >= 0 && line < LinesTotal()) {
2334                 const Sci::Line linesBefore = AnnotationLines(line);
2335                 Annotations()->SetText(line, text);
2336                 const int linesAfter = AnnotationLines(line);
2337                 DocModification mh(SC_MOD_CHANGEANNOTATION, LineStart(line),
2338                         0, 0, 0, line);
2339                 mh.annotationLinesAdded = linesAfter - linesBefore;
2340                 NotifyModified(mh);
2341         }
2342 }
2343
2344 void Document::AnnotationSetStyle(Sci::Line line, int style) {
2345         Annotations()->SetStyle(line, style);
2346         const DocModification mh(SC_MOD_CHANGEANNOTATION, LineStart(line),
2347                 0, 0, 0, line);
2348         NotifyModified(mh);
2349 }
2350
2351 void Document::AnnotationSetStyles(Sci::Line line, const unsigned char *styles) {
2352         if (line >= 0 && line < LinesTotal()) {
2353                 Annotations()->SetStyles(line, styles);
2354         }
2355 }
2356
2357 int Document::AnnotationLines(Sci::Line line) const {
2358         return Annotations()->Lines(line);
2359 }
2360
2361 void Document::AnnotationClearAll() {
2362         const Sci::Line maxEditorLine = LinesTotal();
2363         for (Sci::Line l=0; l<maxEditorLine; l++)
2364                 AnnotationSetText(l, nullptr);
2365         // Free remaining data
2366         Annotations()->ClearAll();
2367 }
2368
2369 void Document::IncrementStyleClock() noexcept {
2370         styleClock = (styleClock + 1) % 0x100000;
2371 }
2372
2373 void SCI_METHOD Document::DecorationSetCurrentIndicator(int indicator) {
2374         decorations->SetCurrentIndicator(indicator);
2375 }
2376
2377 void SCI_METHOD Document::DecorationFillRange(Sci_Position position, int value, Sci_Position fillLength) {
2378         const FillResult<Sci::Position> fr = decorations->FillRange(
2379                 position, value, fillLength);
2380         if (fr.changed) {
2381                 const DocModification mh(SC_MOD_CHANGEINDICATOR | SC_PERFORMED_USER,
2382                                                         fr.position, fr.fillLength);
2383                 NotifyModified(mh);
2384         }
2385 }
2386
2387 bool Document::AddWatcher(DocWatcher *watcher, void *userData) {
2388         const WatcherWithUserData wwud(watcher, userData);
2389         std::vector<WatcherWithUserData>::iterator it =
2390                 std::find(watchers.begin(), watchers.end(), wwud);
2391         if (it != watchers.end())
2392                 return false;
2393         watchers.push_back(wwud);
2394         return true;
2395 }
2396
2397 bool Document::RemoveWatcher(DocWatcher *watcher, void *userData) {
2398         std::vector<WatcherWithUserData>::iterator it =
2399                 std::find(watchers.begin(), watchers.end(), WatcherWithUserData(watcher, userData));
2400         if (it != watchers.end()) {
2401                 watchers.erase(it);
2402                 return true;
2403         }
2404         return false;
2405 }
2406
2407 void Document::NotifyModifyAttempt() {
2408         for (const WatcherWithUserData &watcher : watchers) {
2409                 watcher.watcher->NotifyModifyAttempt(this, watcher.userData);
2410         }
2411 }
2412
2413 void Document::NotifySavePoint(bool atSavePoint) {
2414         for (const WatcherWithUserData &watcher : watchers) {
2415                 watcher.watcher->NotifySavePoint(this, watcher.userData, atSavePoint);
2416         }
2417 }
2418
2419 void Document::NotifyModified(DocModification mh) {
2420         if (mh.modificationType & SC_MOD_INSERTTEXT) {
2421                 decorations->InsertSpace(mh.position, mh.length);
2422         } else if (mh.modificationType & SC_MOD_DELETETEXT) {
2423                 decorations->DeleteRange(mh.position, mh.length);
2424         }
2425         for (const WatcherWithUserData &watcher : watchers) {
2426                 watcher.watcher->NotifyModified(this, mh, watcher.userData);
2427         }
2428 }
2429
2430 // Used for word part navigation.
2431 static bool IsASCIIPunctuationCharacter(unsigned int ch) noexcept {
2432         switch (ch) {
2433         case '!':
2434         case '"':
2435         case '#':
2436         case '$':
2437         case '%':
2438         case '&':
2439         case '\'':
2440         case '(':
2441         case ')':
2442         case '*':
2443         case '+':
2444         case ',':
2445         case '-':
2446         case '.':
2447         case '/':
2448         case ':':
2449         case ';':
2450         case '<':
2451         case '=':
2452         case '>':
2453         case '?':
2454         case '@':
2455         case '[':
2456         case '\\':
2457         case ']':
2458         case '^':
2459         case '_':
2460         case '`':
2461         case '{':
2462         case '|':
2463         case '}':
2464         case '~':
2465                 return true;
2466         default:
2467                 return false;
2468         }
2469 }
2470
2471 bool Document::IsWordPartSeparator(unsigned int ch) const {
2472         return (WordCharacterClass(ch) == CharClassify::ccWord) && IsASCIIPunctuationCharacter(ch);
2473 }
2474
2475 Sci::Position Document::WordPartLeft(Sci::Position pos) const {
2476         if (pos > 0) {
2477                 pos -= CharacterBefore(pos).widthBytes;
2478                 CharacterExtracted ceStart = CharacterAfter(pos);
2479                 if (IsWordPartSeparator(ceStart.character)) {
2480                         while (pos > 0 && IsWordPartSeparator(CharacterAfter(pos).character)) {
2481                                 pos -= CharacterBefore(pos).widthBytes;
2482                         }
2483                 }
2484                 if (pos > 0) {
2485                         ceStart = CharacterAfter(pos);
2486                         pos -= CharacterBefore(pos).widthBytes;
2487                         if (IsLowerCase(ceStart.character)) {
2488                                 while (pos > 0 && IsLowerCase(CharacterAfter(pos).character))
2489                                         pos -= CharacterBefore(pos).widthBytes;
2490                                 if (!IsUpperCase(CharacterAfter(pos).character) && !IsLowerCase(CharacterAfter(pos).character))
2491                                         pos += CharacterAfter(pos).widthBytes;
2492                         } else if (IsUpperCase(ceStart.character)) {
2493                                 while (pos > 0 && IsUpperCase(CharacterAfter(pos).character))
2494                                         pos -= CharacterBefore(pos).widthBytes;
2495                                 if (!IsUpperCase(CharacterAfter(pos).character))
2496                                         pos += CharacterAfter(pos).widthBytes;
2497                         } else if (IsADigit(ceStart.character)) {
2498                                 while (pos > 0 && IsADigit(CharacterAfter(pos).character))
2499                                         pos -= CharacterBefore(pos).widthBytes;
2500                                 if (!IsADigit(CharacterAfter(pos).character))
2501                                         pos += CharacterAfter(pos).widthBytes;
2502                         } else if (IsASCIIPunctuationCharacter(ceStart.character)) {
2503                                 while (pos > 0 && IsASCIIPunctuationCharacter(CharacterAfter(pos).character))
2504                                         pos -= CharacterBefore(pos).widthBytes;
2505                                 if (!IsASCIIPunctuationCharacter(CharacterAfter(pos).character))
2506                                         pos += CharacterAfter(pos).widthBytes;
2507                         } else if (isspacechar(ceStart.character)) {
2508                                 while (pos > 0 && isspacechar(CharacterAfter(pos).character))
2509                                         pos -= CharacterBefore(pos).widthBytes;
2510                                 if (!isspacechar(CharacterAfter(pos).character))
2511                                         pos += CharacterAfter(pos).widthBytes;
2512                         } else if (!IsASCII(ceStart.character)) {
2513                                 while (pos > 0 && !IsASCII(CharacterAfter(pos).character))
2514                                         pos -= CharacterBefore(pos).widthBytes;
2515                                 if (IsASCII(CharacterAfter(pos).character))
2516                                         pos += CharacterAfter(pos).widthBytes;
2517                         } else {
2518                                 pos += CharacterAfter(pos).widthBytes;
2519                         }
2520                 }
2521         }
2522         return pos;
2523 }
2524
2525 Sci::Position Document::WordPartRight(Sci::Position pos) const {
2526         CharacterExtracted ceStart = CharacterAfter(pos);
2527         const Sci::Position length = Length();
2528         if (IsWordPartSeparator(ceStart.character)) {
2529                 while (pos < length && IsWordPartSeparator(CharacterAfter(pos).character))
2530                         pos += CharacterAfter(pos).widthBytes;
2531                 ceStart = CharacterAfter(pos);
2532         }
2533         if (!IsASCII(ceStart.character)) {
2534                 while (pos < length && !IsASCII(CharacterAfter(pos).character))
2535                         pos += CharacterAfter(pos).widthBytes;
2536         } else if (IsLowerCase(ceStart.character)) {
2537                 while (pos < length && IsLowerCase(CharacterAfter(pos).character))
2538                         pos += CharacterAfter(pos).widthBytes;
2539         } else if (IsUpperCase(ceStart.character)) {
2540                 if (IsLowerCase(CharacterAfter(pos + ceStart.widthBytes).character)) {
2541                         pos += CharacterAfter(pos).widthBytes;
2542                         while (pos < length && IsLowerCase(CharacterAfter(pos).character))
2543                                 pos += CharacterAfter(pos).widthBytes;
2544                 } else {
2545                         while (pos < length && IsUpperCase(CharacterAfter(pos).character))
2546                                 pos += CharacterAfter(pos).widthBytes;
2547                 }
2548                 if (IsLowerCase(CharacterAfter(pos).character) && IsUpperCase(CharacterBefore(pos).character))
2549                         pos -= CharacterBefore(pos).widthBytes;
2550         } else if (IsADigit(ceStart.character)) {
2551                 while (pos < length && IsADigit(CharacterAfter(pos).character))
2552                         pos += CharacterAfter(pos).widthBytes;
2553         } else if (IsASCIIPunctuationCharacter(ceStart.character)) {
2554                 while (pos < length && IsASCIIPunctuationCharacter(CharacterAfter(pos).character))
2555                         pos += CharacterAfter(pos).widthBytes;
2556         } else if (isspacechar(ceStart.character)) {
2557                 while (pos < length && isspacechar(CharacterAfter(pos).character))
2558                         pos += CharacterAfter(pos).widthBytes;
2559         } else {
2560                 pos += CharacterAfter(pos).widthBytes;
2561         }
2562         return pos;
2563 }
2564
2565 static constexpr bool IsLineEndChar(char c) noexcept {
2566         return (c == '\n' || c == '\r');
2567 }
2568
2569 Sci::Position Document::ExtendStyleRange(Sci::Position pos, int delta, bool singleLine) {
2570         const int sStart = cb.StyleAt(pos);
2571         if (delta < 0) {
2572                 while (pos > 0 && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsLineEndChar(cb.CharAt(pos))))
2573                         pos--;
2574                 pos++;
2575         } else {
2576                 while (pos < (Length()) && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsLineEndChar(cb.CharAt(pos))))
2577                         pos++;
2578         }
2579         return pos;
2580 }
2581
2582 static char BraceOpposite(char ch) noexcept {
2583         switch (ch) {
2584         case '(':
2585                 return ')';
2586         case ')':
2587                 return '(';
2588         case '[':
2589                 return ']';
2590         case ']':
2591                 return '[';
2592         case '{':
2593                 return '}';
2594         case '}':
2595                 return '{';
2596         case '<':
2597                 return '>';
2598         case '>':
2599                 return '<';
2600         default:
2601                 return '\0';
2602         }
2603 }
2604
2605 // TODO: should be able to extend styled region to find matching brace
2606 Sci::Position Document::BraceMatch(Sci::Position position, Sci::Position /*maxReStyle*/) {
2607         const char chBrace = CharAt(position);
2608         const char chSeek = BraceOpposite(chBrace);
2609         if (chSeek == '\0')
2610                 return - 1;
2611         const int styBrace = StyleIndexAt(position);
2612         int direction = -1;
2613         if (chBrace == '(' || chBrace == '[' || chBrace == '{' || chBrace == '<')
2614                 direction = 1;
2615         int depth = 1;
2616         position = NextPosition(position, direction);
2617         while ((position >= 0) && (position < Length())) {
2618                 const char chAtPos = CharAt(position);
2619                 const int styAtPos = StyleIndexAt(position);
2620                 if ((position > GetEndStyled()) || (styAtPos == styBrace)) {
2621                         if (chAtPos == chBrace)
2622                                 depth++;
2623                         if (chAtPos == chSeek)
2624                                 depth--;
2625                         if (depth == 0)
2626                                 return position;
2627                 }
2628                 const Sci::Position positionBeforeMove = position;
2629                 position = NextPosition(position, direction);
2630                 if (position == positionBeforeMove)
2631                         break;
2632         }
2633         return - 1;
2634 }
2635
2636 /**
2637  * Implementation of RegexSearchBase for the default built-in regular expression engine
2638  */
2639 class BuiltinRegex : public RegexSearchBase {
2640 public:
2641         explicit BuiltinRegex(CharClassify *charClassTable) : search(charClassTable) {}
2642         BuiltinRegex(const BuiltinRegex &) = delete;
2643         BuiltinRegex(BuiltinRegex &&) = delete;
2644         BuiltinRegex &operator=(const BuiltinRegex &) = delete;
2645         BuiltinRegex &operator=(BuiltinRegex &&) = delete;
2646         ~BuiltinRegex() override = default;
2647
2648         Sci::Position FindText(Document *doc, Sci::Position minPos, Sci::Position maxPos, const char *s,
2649                         bool caseSensitive, bool word, bool wordStart, int flags,
2650                         Sci::Position *length) override;
2651
2652         const char *SubstituteByPosition(Document *doc, const char *text, Sci::Position *length) override;
2653
2654 private:
2655         RESearch search;
2656         std::string substituted;
2657 };
2658
2659 namespace {
2660
2661 /**
2662 * RESearchRange keeps track of search range.
2663 */
2664 class RESearchRange {
2665 public:
2666         const Document *doc;
2667         int increment;
2668         Sci::Position startPos;
2669         Sci::Position endPos;
2670         Sci::Line lineRangeStart;
2671         Sci::Line lineRangeEnd;
2672         Sci::Line lineRangeBreak;
2673         RESearchRange(const Document *doc_, Sci::Position minPos, Sci::Position maxPos) : doc(doc_) {
2674                 increment = (minPos <= maxPos) ? 1 : -1;
2675
2676                 // Range endpoints should not be inside DBCS characters or between a CR and LF,
2677                 // but just in case, move them.
2678                 startPos = doc->MovePositionOutsideChar(minPos, 1, true);
2679                 endPos = doc->MovePositionOutsideChar(maxPos, 1, true);
2680
2681                 lineRangeStart = doc->SciLineFromPosition(startPos);
2682                 lineRangeEnd = doc->SciLineFromPosition(endPos);
2683                 lineRangeBreak = lineRangeEnd + increment;
2684         }
2685         Range LineRange(Sci::Line line) const {
2686                 Range range(doc->LineStart(line), doc->LineEnd(line));
2687                 if (increment == 1) {
2688                         if (line == lineRangeStart)
2689                                 range.start = startPos;
2690                         if (line == lineRangeEnd)
2691                                 range.end = endPos;
2692                 } else {
2693                         if (line == lineRangeEnd)
2694                                 range.start = endPos;
2695                         if (line == lineRangeStart)
2696                                 range.end = startPos;
2697                 }
2698                 return range;
2699         }
2700 };
2701
2702 // Define a way for the Regular Expression code to access the document
2703 class DocumentIndexer : public CharacterIndexer {
2704         Document *pdoc;
2705         Sci::Position end;
2706 public:
2707         DocumentIndexer(Document *pdoc_, Sci::Position end_) noexcept :
2708                 pdoc(pdoc_), end(end_) {
2709         }
2710
2711         DocumentIndexer(const DocumentIndexer &) = delete;
2712         DocumentIndexer(DocumentIndexer &&) = delete;
2713         DocumentIndexer &operator=(const DocumentIndexer &) = delete;
2714         DocumentIndexer &operator=(DocumentIndexer &&) = delete;
2715
2716         ~DocumentIndexer() override = default;
2717
2718         char CharAt(Sci::Position index) const noexcept override {
2719                 if (index < 0 || index >= end)
2720                         return 0;
2721                 else
2722                         return pdoc->CharAt(index);
2723         }
2724 };
2725
2726 #ifndef NO_CXX11_REGEX
2727
2728 class ByteIterator {
2729 public:
2730         typedef std::bidirectional_iterator_tag iterator_category;
2731         typedef char value_type;
2732         typedef ptrdiff_t difference_type;
2733         typedef char* pointer;
2734         typedef char& reference;
2735
2736         const Document *doc;
2737         Sci::Position position;
2738
2739         ByteIterator(const Document *doc_=nullptr, Sci::Position position_=0) noexcept :
2740                 doc(doc_), position(position_) {
2741         }
2742         ByteIterator(const ByteIterator &other) noexcept {
2743                 doc = other.doc;
2744                 position = other.position;
2745         }
2746         ByteIterator(ByteIterator &&other) noexcept {
2747                 doc = other.doc;
2748                 position = other.position;
2749         }
2750         ByteIterator &operator=(const ByteIterator &other) noexcept {
2751                 if (this != &other) {
2752                         doc = other.doc;
2753                         position = other.position;
2754                 }
2755                 return *this;
2756         }
2757         ByteIterator &operator=(ByteIterator &&) noexcept = default;
2758         ~ByteIterator() = default;
2759         char operator*() const noexcept {
2760                 return doc->CharAt(position);
2761         }
2762         ByteIterator &operator++() noexcept {
2763                 position++;
2764                 return *this;
2765         }
2766         ByteIterator operator++(int) noexcept {
2767                 ByteIterator retVal(*this);
2768                 position++;
2769                 return retVal;
2770         }
2771         ByteIterator &operator--() noexcept {
2772                 position--;
2773                 return *this;
2774         }
2775         bool operator==(const ByteIterator &other) const noexcept {
2776                 return doc == other.doc && position == other.position;
2777         }
2778         bool operator!=(const ByteIterator &other) const noexcept {
2779                 return doc != other.doc || position != other.position;
2780         }
2781         Sci::Position Pos() const noexcept {
2782                 return position;
2783         }
2784         Sci::Position PosRoundUp() const noexcept {
2785                 return position;
2786         }
2787 };
2788
2789 // On Windows, wchar_t is 16 bits wide and on Unix it is 32 bits wide.
2790 // Would be better to use sizeof(wchar_t) or similar to differentiate
2791 // but easier for now to hard-code platforms.
2792 // C++11 has char16_t and char32_t but neither Clang nor Visual C++
2793 // appear to allow specializing basic_regex over these.
2794
2795 #ifdef _WIN32
2796 #define WCHAR_T_IS_16 1
2797 #else
2798 #define WCHAR_T_IS_16 0
2799 #endif
2800
2801 #if WCHAR_T_IS_16
2802
2803 // On Windows, report non-BMP characters as 2 separate surrogates as that
2804 // matches wregex since it is based on wchar_t.
2805 class UTF8Iterator {
2806         // These 3 fields determine the iterator position and are used for comparisons
2807         const Document *doc;
2808         Sci::Position position;
2809         size_t characterIndex;
2810         // Remaining fields are derived from the determining fields so are excluded in comparisons
2811         unsigned int lenBytes;
2812         size_t lenCharacters;
2813         wchar_t buffered[2];
2814 public:
2815         typedef std::bidirectional_iterator_tag iterator_category;
2816         typedef wchar_t value_type;
2817         typedef ptrdiff_t difference_type;
2818         typedef wchar_t* pointer;
2819         typedef wchar_t& reference;
2820
2821         UTF8Iterator(const Document *doc_=nullptr, Sci::Position position_=0) noexcept :
2822                 doc(doc_), position(position_), characterIndex(0), lenBytes(0), lenCharacters(0), buffered{} {
2823                 buffered[0] = 0;
2824                 buffered[1] = 0;
2825                 if (doc) {
2826                         ReadCharacter();
2827                 }
2828         }
2829         UTF8Iterator(const UTF8Iterator &other) noexcept : buffered{} {
2830                 doc = other.doc;
2831                 position = other.position;
2832                 characterIndex = other.characterIndex;
2833                 lenBytes = other.lenBytes;
2834                 lenCharacters = other.lenCharacters;
2835                 buffered[0] = other.buffered[0];
2836                 buffered[1] = other.buffered[1];
2837         }
2838         UTF8Iterator(UTF8Iterator &&other) noexcept = default;
2839         UTF8Iterator &operator=(const UTF8Iterator &other) noexcept {
2840                 if (this != &other) {
2841                         doc = other.doc;
2842                         position = other.position;
2843                         characterIndex = other.characterIndex;
2844                         lenBytes = other.lenBytes;
2845                         lenCharacters = other.lenCharacters;
2846                         buffered[0] = other.buffered[0];
2847                         buffered[1] = other.buffered[1];
2848                 }
2849                 return *this;
2850         }
2851         UTF8Iterator &operator=(UTF8Iterator &&) noexcept = default;
2852         ~UTF8Iterator() = default;
2853         wchar_t operator*() const noexcept {
2854                 assert(lenCharacters != 0);
2855                 return buffered[characterIndex];
2856         }
2857         UTF8Iterator &operator++() noexcept {
2858                 if ((characterIndex + 1) < (lenCharacters)) {
2859                         characterIndex++;
2860                 } else {
2861                         position += lenBytes;
2862                         ReadCharacter();
2863                         characterIndex = 0;
2864                 }
2865                 return *this;
2866         }
2867         UTF8Iterator operator++(int) noexcept {
2868                 UTF8Iterator retVal(*this);
2869                 if ((characterIndex + 1) < (lenCharacters)) {
2870                         characterIndex++;
2871                 } else {
2872                         position += lenBytes;
2873                         ReadCharacter();
2874                         characterIndex = 0;
2875                 }
2876                 return retVal;
2877         }
2878         UTF8Iterator &operator--() noexcept {
2879                 if (characterIndex) {
2880                         characterIndex--;
2881                 } else {
2882                         position = doc->NextPosition(position, -1);
2883                         ReadCharacter();
2884                         characterIndex = lenCharacters - 1;
2885                 }
2886                 return *this;
2887         }
2888         bool operator==(const UTF8Iterator &other) const noexcept {
2889                 // Only test the determining fields, not the character widths and values derived from this
2890                 return doc == other.doc &&
2891                         position == other.position &&
2892                         characterIndex == other.characterIndex;
2893         }
2894         bool operator!=(const UTF8Iterator &other) const noexcept {
2895                 // Only test the determining fields, not the character widths and values derived from this
2896                 return doc != other.doc ||
2897                         position != other.position ||
2898                         characterIndex != other.characterIndex;
2899         }
2900         Sci::Position Pos() const noexcept {
2901                 return position;
2902         }
2903         Sci::Position PosRoundUp() const noexcept {
2904                 if (characterIndex)
2905                         return position + lenBytes;     // Force to end of character
2906                 else
2907                         return position;
2908         }
2909 private:
2910         void ReadCharacter() noexcept {
2911                 const Document::CharacterExtracted charExtracted = doc->ExtractCharacter(position);
2912                 lenBytes = charExtracted.widthBytes;
2913                 if (charExtracted.character == unicodeReplacementChar) {
2914                         lenCharacters = 1;
2915                         buffered[0] = static_cast<wchar_t>(charExtracted.character);
2916                 } else {
2917                         lenCharacters = UTF16FromUTF32Character(charExtracted.character, buffered);
2918                 }
2919         }
2920 };
2921
2922 #else
2923
2924 // On Unix, report non-BMP characters as single characters
2925
2926 class UTF8Iterator {
2927         const Document *doc;
2928         Sci::Position position;
2929 public:
2930         typedef std::bidirectional_iterator_tag iterator_category;
2931         typedef wchar_t value_type;
2932         typedef ptrdiff_t difference_type;
2933         typedef wchar_t* pointer;
2934         typedef wchar_t& reference;
2935
2936         UTF8Iterator(const Document *doc_=nullptr, Sci::Position position_=0) noexcept :
2937                 doc(doc_), position(position_) {
2938         }
2939         UTF8Iterator(const UTF8Iterator &other) noexcept {
2940                 doc = other.doc;
2941                 position = other.position;
2942         }
2943         UTF8Iterator(UTF8Iterator &&other) noexcept = default;
2944         UTF8Iterator &operator=(const UTF8Iterator &other) noexcept {
2945                 if (this != &other) {
2946                         doc = other.doc;
2947                         position = other.position;
2948                 }
2949                 return *this;
2950         }
2951         UTF8Iterator &operator=(UTF8Iterator &&) noexcept = default;
2952         ~UTF8Iterator() = default;
2953         wchar_t operator*() const noexcept {
2954                 const Document::CharacterExtracted charExtracted = doc->ExtractCharacter(position);
2955                 return charExtracted.character;
2956         }
2957         UTF8Iterator &operator++() noexcept {
2958                 position = doc->NextPosition(position, 1);
2959                 return *this;
2960         }
2961         UTF8Iterator operator++(int) noexcept {
2962                 UTF8Iterator retVal(*this);
2963                 position = doc->NextPosition(position, 1);
2964                 return retVal;
2965         }
2966         UTF8Iterator &operator--() noexcept {
2967                 position = doc->NextPosition(position, -1);
2968                 return *this;
2969         }
2970         bool operator==(const UTF8Iterator &other) const noexcept {
2971                 return doc == other.doc && position == other.position;
2972         }
2973         bool operator!=(const UTF8Iterator &other) const noexcept {
2974                 return doc != other.doc || position != other.position;
2975         }
2976         Sci::Position Pos() const noexcept {
2977                 return position;
2978         }
2979         Sci::Position PosRoundUp() const noexcept {
2980                 return position;
2981         }
2982 };
2983
2984 #endif
2985
2986 std::regex_constants::match_flag_type MatchFlags(const Document *doc, Sci::Position startPos, Sci::Position endPos) {
2987         std::regex_constants::match_flag_type flagsMatch = std::regex_constants::match_default;
2988         if (!doc->IsLineStartPosition(startPos))
2989                 flagsMatch |= std::regex_constants::match_not_bol;
2990         if (!doc->IsLineEndPosition(endPos))
2991                 flagsMatch |= std::regex_constants::match_not_eol;
2992         return flagsMatch;
2993 }
2994
2995 template<typename Iterator, typename Regex>
2996 bool MatchOnLines(const Document *doc, const Regex &regexp, const RESearchRange &resr, RESearch &search) {
2997         std::match_results<Iterator> match;
2998
2999         // MSVC and libc++ have problems with ^ and $ matching line ends inside a range.
3000         // CRLF line ends are also a problem as ^ and $ only treat LF as a line end.
3001         // The std::regex::multiline option was added to C++17 to improve behaviour but
3002         // has not been implemented by compiler runtimes with MSVC always in multiline
3003         // mode and libc++ and libstdc++ always in single-line mode.
3004         // If multiline regex worked well then the line by line iteration could be removed
3005         // for the forwards case and replaced with the following 4 lines:
3006 #ifdef REGEX_MULTILINE
3007         Iterator itStart(doc, resr.startPos);
3008         Iterator itEnd(doc, resr.endPos);
3009         const std::regex_constants::match_flag_type flagsMatch = MatchFlags(doc, resr.startPos, resr.endPos);
3010         const bool matched = std::regex_search(itStart, itEnd, match, regexp, flagsMatch);
3011 #else
3012         // Line by line.
3013         bool matched = false;
3014         for (Sci::Line line = resr.lineRangeStart; line != resr.lineRangeBreak; line += resr.increment) {
3015                 const Range lineRange = resr.LineRange(line);
3016                 Iterator itStart(doc, lineRange.start);
3017                 Iterator itEnd(doc, lineRange.end);
3018                 std::regex_constants::match_flag_type flagsMatch = MatchFlags(doc, lineRange.start, lineRange.end);
3019                 matched = std::regex_search(itStart, itEnd, match, regexp, flagsMatch);
3020                 // Check for the last match on this line.
3021                 if (matched) {
3022                         if (resr.increment == -1) {
3023                                 while (matched) {
3024                                         Iterator itNext(doc, match[0].second.PosRoundUp());
3025                                         flagsMatch = MatchFlags(doc, itNext.Pos(), lineRange.end);
3026                                         std::match_results<Iterator> matchNext;
3027                                         matched = std::regex_search(itNext, itEnd, matchNext, regexp, flagsMatch);
3028                                         if (matched) {
3029                                                 if (match[0].first == match[0].second) {
3030                                                         // Empty match means failure so exit
3031                                                         return false;
3032                                                 }
3033                                                 match = matchNext;
3034                                         }
3035                                 }
3036                                 matched = true;
3037                         }
3038                         break;
3039                 }
3040         }
3041 #endif
3042         if (matched) {
3043                 for (size_t co = 0; co < match.size(); co++) {
3044                         search.bopat[co] = match[co].first.Pos();
3045                         search.eopat[co] = match[co].second.PosRoundUp();
3046                         const Sci::Position lenMatch = search.eopat[co] - search.bopat[co];
3047                         search.pat[co].resize(lenMatch);
3048                         for (Sci::Position iPos = 0; iPos < lenMatch; iPos++) {
3049                                 search.pat[co][iPos] = doc->CharAt(iPos + search.bopat[co]);
3050                         }
3051                 }
3052         }
3053         return matched;
3054 }
3055
3056 Sci::Position Cxx11RegexFindText(const Document *doc, Sci::Position minPos, Sci::Position maxPos, const char *s,
3057         bool caseSensitive, Sci::Position *length, RESearch &search) {
3058         const RESearchRange resr(doc, minPos, maxPos);
3059         try {
3060                 //ElapsedPeriod ep;
3061                 std::regex::flag_type flagsRe = std::regex::ECMAScript;
3062                 // Flags that apper to have no effect:
3063                 // | std::regex::collate | std::regex::extended;
3064                 if (!caseSensitive)
3065                         flagsRe = flagsRe | std::regex::icase;
3066
3067                 // Clear the RESearch so can fill in matches
3068                 search.Clear();
3069
3070                 bool matched = false;
3071                 if (SC_CP_UTF8 == doc->dbcsCodePage) {
3072                         const std::wstring ws = WStringFromUTF8(s, strlen(s));
3073                         std::wregex regexp;
3074                         regexp.assign(ws, flagsRe);
3075                         matched = MatchOnLines<UTF8Iterator>(doc, regexp, resr, search);
3076
3077                 } else {
3078                         std::regex regexp;
3079                         regexp.assign(s, flagsRe);
3080                         matched = MatchOnLines<ByteIterator>(doc, regexp, resr, search);
3081                 }
3082
3083                 Sci::Position posMatch = -1;
3084                 if (matched) {
3085                         posMatch = search.bopat[0];
3086                         *length = search.eopat[0] - search.bopat[0];
3087                 }
3088                 // Example - search in doc/ScintillaHistory.html for
3089                 // [[:upper:]]eta[[:space:]]
3090                 // On MacBook, normally around 1 second but with locale imbued -> 14 seconds.
3091                 //const double durSearch = ep.Duration(true);
3092                 //Platform::DebugPrintf("Search:%9.6g \n", durSearch);
3093                 return posMatch;
3094         } catch (std::regex_error &) {
3095                 // Failed to create regular expression
3096                 throw RegexError();
3097         } catch (...) {
3098                 // Failed in some other way
3099                 return -1;
3100         }
3101 }
3102
3103 #endif
3104
3105 }
3106
3107 Sci::Position BuiltinRegex::FindText(Document *doc, Sci::Position minPos, Sci::Position maxPos, const char *s,
3108                         bool caseSensitive, bool, bool, int flags,
3109                         Sci::Position *length) {
3110
3111 #ifndef NO_CXX11_REGEX
3112         if (flags & SCFIND_CXX11REGEX) {
3113                         return Cxx11RegexFindText(doc, minPos, maxPos, s,
3114                         caseSensitive, length, search);
3115         }
3116 #endif
3117
3118         const RESearchRange resr(doc, minPos, maxPos);
3119
3120         const bool posix = (flags & SCFIND_POSIX) != 0;
3121
3122         const char *errmsg = search.Compile(s, *length, caseSensitive, posix);
3123         if (errmsg) {
3124                 return -1;
3125         }
3126         // Find a variable in a property file: \$(\([A-Za-z0-9_.]+\))
3127         // Replace first '.' with '-' in each property file variable reference:
3128         //     Search: \$(\([A-Za-z0-9_-]+\)\.\([A-Za-z0-9_.]+\))
3129         //     Replace: $(\1-\2)
3130         Sci::Position pos = -1;
3131         Sci::Position lenRet = 0;
3132         const bool searchforLineStart = s[0] == '^';
3133         const char searchEnd = s[*length - 1];
3134         const char searchEndPrev = (*length > 1) ? s[*length - 2] : '\0';
3135         const bool searchforLineEnd = (searchEnd == '$') && (searchEndPrev != '\\');
3136         for (Sci::Line line = resr.lineRangeStart; line != resr.lineRangeBreak; line += resr.increment) {
3137                 Sci::Position startOfLine = doc->LineStart(line);
3138                 Sci::Position endOfLine = doc->LineEnd(line);
3139                 if (resr.increment == 1) {
3140                         if (line == resr.lineRangeStart) {
3141                                 if ((resr.startPos != startOfLine) && searchforLineStart)
3142                                         continue;       // Can't match start of line if start position after start of line
3143                                 startOfLine = resr.startPos;
3144                         }
3145                         if (line == resr.lineRangeEnd) {
3146                                 if ((resr.endPos != endOfLine) && searchforLineEnd)
3147                                         continue;       // Can't match end of line if end position before end of line
3148                                 endOfLine = resr.endPos;
3149                         }
3150                 } else {
3151                         if (line == resr.lineRangeEnd) {
3152                                 if ((resr.endPos != startOfLine) && searchforLineStart)
3153                                         continue;       // Can't match start of line if end position after start of line
3154                                 startOfLine = resr.endPos;
3155                         }
3156                         if (line == resr.lineRangeStart) {
3157                                 if ((resr.startPos != endOfLine) && searchforLineEnd)
3158                                         continue;       // Can't match end of line if start position before end of line
3159                                 endOfLine = resr.startPos;
3160                         }
3161                 }
3162
3163                 const DocumentIndexer di(doc, endOfLine);
3164                 int success = search.Execute(di, startOfLine, endOfLine);
3165                 if (success) {
3166                         pos = search.bopat[0];
3167                         // Ensure only whole characters selected
3168                         search.eopat[0] = doc->MovePositionOutsideChar(search.eopat[0], 1, false);
3169                         lenRet = search.eopat[0] - search.bopat[0];
3170                         // There can be only one start of a line, so no need to look for last match in line
3171                         if ((resr.increment == -1) && !searchforLineStart) {
3172                                 // Check for the last match on this line.
3173                                 int repetitions = 1000; // Break out of infinite loop
3174                                 while (success && (search.eopat[0] <= endOfLine) && (repetitions--)) {
3175                                         success = search.Execute(di, pos+1, endOfLine);
3176                                         if (success) {
3177                                                 if (search.eopat[0] <= minPos) {
3178                                                         pos = search.bopat[0];
3179                                                         lenRet = search.eopat[0] - search.bopat[0];
3180                                                 } else {
3181                                                         success = 0;
3182                                                 }
3183                                         }
3184                                 }
3185                         }
3186                         break;
3187                 }
3188         }
3189         *length = lenRet;
3190         return pos;
3191 }
3192
3193 const char *BuiltinRegex::SubstituteByPosition(Document *doc, const char *text, Sci::Position *length) {
3194         substituted.clear();
3195         const DocumentIndexer di(doc, doc->Length());
3196         search.GrabMatches(di);
3197         for (Sci::Position j = 0; j < *length; j++) {
3198                 if (text[j] == '\\') {
3199                         if (text[j + 1] >= '0' && text[j + 1] <= '9') {
3200                                 const unsigned int patNum = text[j + 1] - '0';
3201                                 const Sci::Position len = search.eopat[patNum] - search.bopat[patNum];
3202                                 if (!search.pat[patNum].empty())        // Will be null if try for a match that did not occur
3203                                         substituted.append(search.pat[patNum].c_str(), len);
3204                                 j++;
3205                         } else {
3206                                 j++;
3207                                 switch (text[j]) {
3208                                 case 'a':
3209                                         substituted.push_back('\a');
3210                                         break;
3211                                 case 'b':
3212                                         substituted.push_back('\b');
3213                                         break;
3214                                 case 'f':
3215                                         substituted.push_back('\f');
3216                                         break;
3217                                 case 'n':
3218                                         substituted.push_back('\n');
3219                                         break;
3220                                 case 'r':
3221                                         substituted.push_back('\r');
3222                                         break;
3223                                 case 't':
3224                                         substituted.push_back('\t');
3225                                         break;
3226                                 case 'v':
3227                                         substituted.push_back('\v');
3228                                         break;
3229                                 case '\\':
3230                                         substituted.push_back('\\');
3231                                         break;
3232                                 default:
3233                                         substituted.push_back('\\');
3234                                         j--;
3235                                 }
3236                         }
3237                 } else {
3238                         substituted.push_back(text[j]);
3239                 }
3240         }
3241         *length = substituted.length();
3242         return substituted.c_str();
3243 }
3244
3245 #ifndef SCI_OWNREGEX
3246
3247 RegexSearchBase *Scintilla::CreateRegexSearch(CharClassify *charClassTable) {
3248         return new BuiltinRegex(charClassTable);
3249 }
3250
3251 #endif