ext/scintilla/src/Document.cxx

   1 // Scintilla source code edit control
   2 /** @file Document.cxx
   3  ** Text document that handles notifications, DBCS, styling, words and end of line.
   4  **/
   5 // Copyright 1998-2011 by Neil Hodgson <neilh@scintilla.org>
   6 // The License.txt file describes the conditions under which this software may be distributed.
   7
   8 #include <cstddef>
   9 #include <cstdlib>
  10 #include <cassert>
  11 #include <cstring>
  12 #include <cstdio>
  13 #include <cmath>
  14
  15 #include <stdexcept>
  16 #include <string>
  17 #include <string_view>
  18 #include <vector>
  19 #include <array>
  20 #include <forward_list>
  21 #include <optional>
  22 #include <algorithm>
  23 #include <memory>
  24 #include <chrono>
  25
  26 #ifndef NO_CXX11_REGEX
  27 #include <regex>
  28 #endif
  29
  30 #include "ScintillaTypes.h"
  31 #include "ILoader.h"
  32 #include "ILexer.h"
  33
  34 #include "Debugging.h"
  35
  36 #include "CharacterType.h"
  37 #include "CharacterCategoryMap.h"
  38 #include "Position.h"
  39 #include "SplitVector.h"
  40 #include "Partitioning.h"
  41 #include "RunStyles.h"
  42 #include "CellBuffer.h"
  43 #include "PerLine.h"
  44 #include "CharClassify.h"
  45 #include "Decoration.h"
  46 #include "CaseFolder.h"
  47 #include "Document.h"
  48 #include "RESearch.h"
  49 #include "UniConversion.h"
  50 #include "ElapsedPeriod.h"
  51
  52 using namespace Scintilla;
  53 using namespace Scintilla::Internal;
  54
  55 #if defined(__GNUC__) && !defined(__clang__)
  56 // False warnings from g++ 14.1 for UTF-8 accumulation code where UTF8MaxBytes allocated.
  57 #pragma GCC diagnostic ignored "-Wstringop-overflow"
  58 #endif
  59
  60 LexInterface::LexInterface(Document *pdoc_) noexcept : pdoc(pdoc_), performingStyle(false) {
  61 }
  62
  63 LexInterface::~LexInterface() noexcept = default;
  64
  65 void LexInterface::SetInstance(ILexer5 *instance_) noexcept {
  66         instance.reset(instance_);
  67 }
  68
  69 void LexInterface::Colourise(Sci::Position start, Sci::Position end) {
  70         if (pdoc && instance && !performingStyle) {
  71                 // Protect against reentrance, which may occur, for example, when
  72                 // fold points are discovered while performing styling and the folding
  73                 // code looks for child lines which may trigger styling.
  74                 performingStyle = true;
  75
  76                 const Sci::Position lengthDoc = pdoc->Length();
  77                 if (end == -1)
  78                         end = lengthDoc;
  79                 const Sci::Position len = end - start;
  80
  81                 PLATFORM_ASSERT(len >= 0);
  82                 PLATFORM_ASSERT(start + len <= lengthDoc);
  83
  84                 int styleStart = 0;
  85                 if (start > 0)
  86                         styleStart = pdoc->StyleAt(start - 1);
  87
  88                 if (len > 0) {
  89                         instance->Lex(start, len, styleStart, pdoc);
  90                         instance->Fold(start, len, styleStart, pdoc);
  91                 }
  92
  93                 performingStyle = false;
  94         }
  95 }
  96
  97 LineEndType LexInterface::LineEndTypesSupported() {
  98         if (instance) {
  99                 return static_cast<LineEndType>(instance->LineEndTypesSupported());
 100         }
 101         return LineEndType::Default;
 102 }
 103
 104 bool LexInterface::UseContainerLexing() const noexcept {
 105         return !instance;
 106 }
 107
 108 ActionDuration::ActionDuration(double duration_, double minDuration_, double maxDuration_) noexcept :
 109         duration(duration_), minDuration(minDuration_), maxDuration(maxDuration_) {
 110 }
 111
 112 void ActionDuration::AddSample(size_t numberActions, double durationOfActions) noexcept {
 113         // Only adjust for multiple actions to avoid instability
 114         if (numberActions < 8)
 115                 return;
 116
 117         // Alpha value for exponential smoothing.
 118         // Most recent value contributes 25% to smoothed value.
 119         constexpr double alpha = 0.25;
 120
 121         const double durationOne = durationOfActions / numberActions;
 122         duration = std::clamp(alpha * durationOne + (1.0 - alpha) * duration,
 123                 minDuration, maxDuration);
 124 }
 125
 126 double ActionDuration::Duration() const noexcept {
 127         return duration;
 128 }
 129
 130 size_t ActionDuration::ActionsInAllowedTime(double secondsAllowed) const noexcept {
 131         return std::lround(secondsAllowed / Duration());
 132 }
 133
 134 CharacterExtracted::CharacterExtracted(const unsigned char *charBytes, size_t widthCharBytes) noexcept {
 135         const int utf8status = UTF8Classify(charBytes, widthCharBytes);
 136         if (utf8status & UTF8MaskInvalid) {
 137                 // Treat as invalid and use up just one byte
 138                 character = unicodeReplacementChar;
 139                 widthBytes = 1;
 140         } else {
 141                 character = UnicodeFromUTF8(charBytes);
 142                 widthBytes = utf8status & UTF8MaskWidth;
 143         }
 144 }
 145
 146 Document::Document(DocumentOption options) :
 147         cb(!FlagSet(options, DocumentOption::StylesNone), FlagSet(options, DocumentOption::TextLarge)),
 148         durationStyleOneByte(0.000001, 0.0000001, 0.00001) {
 149         refCount = 0;
 150 #ifdef _WIN32
 151         eolMode = EndOfLine::CrLf;
 152 #else
 153         eolMode = EndOfLine::Lf;
 154 #endif
 155         dbcsCodePage = CpUtf8;
 156         lineEndBitSet = LineEndType::Default;
 157         endStyled = 0;
 158         styleClock = 0;
 159         enteredModification = 0;
 160         enteredStyling = 0;
 161         enteredReadOnlyCount = 0;
 162         insertionSet = false;
 163         tabInChars = 8;
 164         indentInChars = 0;
 165         actualIndentInChars = 8;
 166         useTabs = true;
 167         tabIndents = true;
 168         backspaceUnindents = false;
 169
 170         matchesValid = false;
 171
 172         perLineData[ldMarkers] = std::make_unique<LineMarkers>();
 173         perLineData[ldLevels] = std::make_unique<LineLevels>();
 174         perLineData[ldState] = std::make_unique<LineState>();
 175         perLineData[ldMargin] = std::make_unique<LineAnnotation>();
 176         perLineData[ldAnnotation] = std::make_unique<LineAnnotation>();
 177         perLineData[ldEOLAnnotation] = std::make_unique<LineAnnotation>();
 178
 179         decorations = DecorationListCreate(IsLarge());
 180
 181         cb.SetPerLine(this);
 182         cb.SetUTF8Substance(CpUtf8 == dbcsCodePage);
 183 }
 184
 185 Document::~Document() {
 186         for (const WatcherWithUserData &watcher : watchers) {
 187                 watcher.watcher->NotifyDeleted(this, watcher.userData);
 188         }
 189 }
 190
 191 // Increase reference count and return its previous value.
 192 int SCI_METHOD Document::AddRef() noexcept {
 193         return refCount++;
 194 }
 195
 196 // Decrease reference count and return its previous value.
 197 // Delete the document if reference count reaches zero.
 198 int SCI_METHOD Document::Release() {
 199         const int curRefCount = --refCount;
 200         if (curRefCount == 0)
 201                 delete this;
 202         return curRefCount;
 203 }
 204
 205 void Document::Init() {
 206         for (const std::unique_ptr<PerLine> &pl : perLineData) {
 207                 if (pl)
 208                         pl->Init();
 209         }
 210 }
 211
 212 void Document::InsertLine(Sci::Line line) {
 213         for (const std::unique_ptr<PerLine> &pl : perLineData) {
 214                 if (pl)
 215                         pl->InsertLine(line);
 216         }
 217 }
 218
 219 void Document::InsertLines(Sci::Line line, Sci::Line lines) {
 220         for (const auto &pl : perLineData) {
 221                 if (pl)
 222                         pl->InsertLines(line, lines);
 223         }
 224 }
 225
 226 void Document::RemoveLine(Sci::Line line) {
 227         for (const std::unique_ptr<PerLine> &pl : perLineData) {
 228                 if (pl)
 229                         pl->RemoveLine(line);
 230         }
 231 }
 232
 233 LineMarkers *Document::Markers() const noexcept {
 234         return static_cast<LineMarkers *>(perLineData[ldMarkers].get());
 235 }
 236
 237 LineLevels *Document::Levels() const noexcept {
 238         return static_cast<LineLevels *>(perLineData[ldLevels].get());
 239 }
 240
 241 LineState *Document::States() const noexcept {
 242         return static_cast<LineState *>(perLineData[ldState].get());
 243 }
 244
 245 LineAnnotation *Document::Margins() const noexcept {
 246         return static_cast<LineAnnotation *>(perLineData[ldMargin].get());
 247 }
 248
 249 LineAnnotation *Document::Annotations() const noexcept {
 250         return static_cast<LineAnnotation *>(perLineData[ldAnnotation].get());
 251 }
 252
 253 LineAnnotation *Document::EOLAnnotations() const noexcept {
 254         return static_cast<LineAnnotation *>(perLineData[ldEOLAnnotation].get());
 255 }
 256
 257 LineEndType Document::LineEndTypesSupported() const {
 258         if ((CpUtf8 == dbcsCodePage) && pli)
 259                 return pli->LineEndTypesSupported();
 260         else
 261                 return LineEndType::Default;
 262 }
 263
 264 bool Document::SetDBCSCodePage(int dbcsCodePage_) {
 265         if (dbcsCodePage != dbcsCodePage_) {
 266                 dbcsCodePage = dbcsCodePage_;
 267                 SetCaseFolder(nullptr);
 268                 cb.SetLineEndTypes(lineEndBitSet & LineEndTypesSupported());
 269                 cb.SetUTF8Substance(CpUtf8 == dbcsCodePage);
 270                 ModifiedAt(0);  // Need to restyle whole document
 271                 return true;
 272         } else {
 273                 return false;
 274         }
 275 }
 276
 277 bool Document::SetLineEndTypesAllowed(LineEndType lineEndBitSet_) {
 278         if (lineEndBitSet != lineEndBitSet_) {
 279                 lineEndBitSet = lineEndBitSet_;
 280                 const LineEndType lineEndBitSetActive = lineEndBitSet & LineEndTypesSupported();
 281                 if (lineEndBitSetActive != cb.GetLineEndTypes()) {
 282                         ModifiedAt(0);
 283                         cb.SetLineEndTypes(lineEndBitSetActive);
 284                         return true;
 285                 } else {
 286                         return false;
 287                 }
 288         } else {
 289                 return false;
 290         }
 291 }
 292
 293 void Document::SetSavePoint() {
 294         cb.SetSavePoint();
 295         NotifySavePoint(true);
 296 }
 297
 298 void Document::TentativeUndo() {
 299         if (!TentativeActive())
 300                 return;
 301         CheckReadOnly();
 302         if (enteredModification == 0) {
 303                 enteredModification++;
 304                 if (!cb.IsReadOnly()) {
 305                         const bool startSavePoint = cb.IsSavePoint();
 306                         bool multiLine = false;
 307                         const int steps = cb.TentativeSteps();
 308                         //Platform::DebugPrintf("Steps=%d\n", steps);
 309                         for (int step = 0; step < steps; step++) {
 310                                 const Sci::Line prevLinesTotal = LinesTotal();
 311                                 const Action action = cb.GetUndoStep();
 312                                 if (action.at == ActionType::remove) {
 313                                         NotifyModified(DocModification(
 314                                                                         ModificationFlags::BeforeInsert | ModificationFlags::Undo, action));
 315                                 } else if (action.at == ActionType::container) {
 316                                         DocModification dm(ModificationFlags::Container | ModificationFlags::Undo);
 317                                         dm.token = action.position;
 318                                         NotifyModified(dm);
 319                                 } else {
 320                                         NotifyModified(DocModification(
 321                                                                         ModificationFlags::BeforeDelete | ModificationFlags::Undo, action));
 322                                 }
 323                                 cb.PerformUndoStep();
 324                                 if (action.at != ActionType::container) {
 325                                         ModifiedAt(action.position);
 326                                 }
 327
 328                                 ModificationFlags modFlags = ModificationFlags::Undo;
 329                                 // With undo, an insertion action becomes a deletion notification
 330                                 if (action.at == ActionType::remove) {
 331                                         modFlags |= ModificationFlags::InsertText;
 332                                 } else if (action.at == ActionType::insert) {
 333                                         modFlags |= ModificationFlags::DeleteText;
 334                                 }
 335                                 if (steps > 1)
 336                                         modFlags |= ModificationFlags::MultiStepUndoRedo;
 337                                 const Sci::Line linesAdded = LinesTotal() - prevLinesTotal;
 338                                 if (linesAdded != 0)
 339                                         multiLine = true;
 340                                 if (step == steps - 1) {
 341                                         modFlags |= ModificationFlags::LastStepInUndoRedo;
 342                                         if (multiLine)
 343                                                 modFlags |= ModificationFlags::MultilineUndoRedo;
 344                                 }
 345                                 NotifyModified(DocModification(modFlags, action.position, action.lenData,
 346                                                                                            linesAdded, action.data));
 347                         }
 348
 349                         const bool endSavePoint = cb.IsSavePoint();
 350                         if (startSavePoint != endSavePoint)
 351                                 NotifySavePoint(endSavePoint);
 352
 353                         cb.TentativeCommit();
 354                 }
 355                 enteredModification--;
 356         }
 357 }
 358
 359 int Document::UndoActions() const noexcept {
 360         return cb.UndoActions();
 361 }
 362
 363 void Document::SetUndoSavePoint(int action) noexcept {
 364         cb.SetUndoSavePoint(action);
 365 }
 366
 367 int Document::UndoSavePoint() const noexcept {
 368         return cb.UndoSavePoint();
 369 }
 370
 371 void Document::SetUndoDetach(int action) noexcept {
 372         cb.SetUndoDetach(action);
 373 }
 374
 375 int Document::UndoDetach() const noexcept {
 376         return cb.UndoDetach();
 377 }
 378
 379 void Document::SetUndoTentative(int action) noexcept {
 380         cb.SetUndoTentative(action);
 381 }
 382
 383 int Document::UndoTentative() const noexcept {
 384         return cb.UndoTentative();
 385 }
 386
 387 void Document::SetUndoCurrent(int action) {
 388         cb.SetUndoCurrent(action);
 389 }
 390
 391 int Document::UndoCurrent() const noexcept {
 392         return cb.UndoCurrent();
 393 }
 394
 395 int Document::UndoActionType(int action) const noexcept {
 396         return cb.UndoActionType(action);
 397 }
 398
 399 Sci::Position Document::UndoActionPosition(int action) const noexcept {
 400         return cb.UndoActionPosition(action);
 401 }
 402
 403 std::string_view Document::UndoActionText(int action) const noexcept {
 404         return cb.UndoActionText(action);
 405 }
 406
 407 void Document::PushUndoActionType(int type, Sci::Position position) {
 408         cb.PushUndoActionType(type, position);
 409 }
 410
 411 void Document::ChangeLastUndoActionText(size_t length, const char *text) {
 412         cb.ChangeLastUndoActionText(length, text);
 413 }
 414
 415 int Document::GetMark(Sci::Line line, bool includeChangeHistory) const {
 416         int marksHistory = 0;
 417         if (includeChangeHistory && (line < LinesTotal())) {
 418                 int marksEdition = 0;
 419
 420                 const Sci::Position start = LineStart(line);
 421                 const Sci::Position lineNext = LineStart(line + 1);
 422                 for (Sci::Position position = start; position < lineNext;) {
 423                         const int edition = EditionAt(position);
 424                         if (edition) {
 425                                 marksEdition |= 1 << (edition-1);
 426                         }
 427                         position = EditionEndRun(position);
 428                 }
 429                 const Sci::Position lineEnd = LineEnd(line);
 430                 for (Sci::Position position = start; position <= lineEnd;) {
 431                         marksEdition |= EditionDeletesAt(position);
 432                         position = EditionNextDelete(position);
 433                 }
 434
 435                 /* Bits: RevertedToOrigin, Saved, Modified, RevertedToModified */
 436                 constexpr unsigned int editionShift = static_cast<unsigned int>(MarkerOutline::HistoryRevertedToOrigin);
 437                 marksHistory = marksEdition << editionShift;
 438         }
 439
 440         return marksHistory | Markers()->MarkValue(line);
 441 }
 442
 443 Sci::Line Document::MarkerNext(Sci::Line lineStart, int mask) const noexcept {
 444         return Markers()->MarkerNext(lineStart, mask);
 445 }
 446
 447 int Document::AddMark(Sci::Line line, int markerNum) {
 448         if (line >= 0 && line < LinesTotal()) {
 449                 const int prev = Markers()->AddMark(line, markerNum, LinesTotal());
 450                 const DocModification mh(ModificationFlags::ChangeMarker, LineStart(line), 0, 0, nullptr, line);
 451                 NotifyModified(mh);
 452                 return prev;
 453         } else {
 454                 return -1;
 455         }
 456 }
 457
 458 void Document::AddMarkSet(Sci::Line line, int valueSet) {
 459         if (line < 0 || line >= LinesTotal()) {
 460                 return;
 461         }
 462         unsigned int m = valueSet;
 463         for (int i = 0; m; i++, m >>= 1) {
 464                 if (m & 1)
 465                         Markers()->AddMark(line, i, LinesTotal());
 466         }
 467         const DocModification mh(ModificationFlags::ChangeMarker, LineStart(line), 0, 0, nullptr, line);
 468         NotifyModified(mh);
 469 }
 470
 471 void Document::DeleteMark(Sci::Line line, int markerNum) {
 472         Markers()->DeleteMark(line, markerNum, false);
 473         const DocModification mh(ModificationFlags::ChangeMarker, LineStart(line), 0, 0, nullptr, line);
 474         NotifyModified(mh);
 475 }
 476
 477 void Document::DeleteMarkFromHandle(int markerHandle) {
 478         Markers()->DeleteMarkFromHandle(markerHandle);
 479         DocModification mh(ModificationFlags::ChangeMarker);
 480         mh.line = -1;
 481         NotifyModified(mh);
 482 }
 483
 484 void Document::DeleteAllMarks(int markerNum) {
 485         bool someChanges = false;
 486         for (Sci::Line line = 0; line < LinesTotal(); line++) {
 487                 if (Markers()->DeleteMark(line, markerNum, true))
 488                         someChanges = true;
 489         }
 490         if (someChanges) {
 491                 DocModification mh(ModificationFlags::ChangeMarker);
 492                 mh.line = -1;
 493                 NotifyModified(mh);
 494         }
 495 }
 496
 497 Sci::Line Document::LineFromHandle(int markerHandle) const noexcept {
 498         return Markers()->LineFromHandle(markerHandle);
 499 }
 500
 501 int Document::MarkerNumberFromLine(Sci::Line line, int which) const noexcept {
 502         return Markers()->NumberFromLine(line, which);
 503 }
 504
 505 int Document::MarkerHandleFromLine(Sci::Line line, int which) const noexcept {
 506         return Markers()->HandleFromLine(line, which);
 507 }
 508
 509 Sci_Position SCI_METHOD Document::LineStart(Sci_Position line) const {
 510         return cb.LineStart(line);
 511 }
 512
 513 Range Document::LineRange(Sci::Line line) const noexcept {
 514         return {cb.LineStart(line), cb.LineStart(line + 1)};
 515 }
 516
 517 bool Document::IsLineStartPosition(Sci::Position position) const noexcept {
 518         return LineStartPosition(position) == position;
 519 }
 520
 521 Sci_Position SCI_METHOD Document::LineEnd(Sci_Position line) const {
 522         return cb.LineEnd(line);
 523 }
 524
 525 int SCI_METHOD Document::DEVersion() const noexcept {
 526         return deRelease0;
 527 }
 528
 529 void SCI_METHOD Document::SetErrorStatus(int status) {
 530         // Tell the watchers an error has occurred.
 531         for (const WatcherWithUserData &watcher : watchers) {
 532                 watcher.watcher->NotifyErrorOccurred(this, watcher.userData, static_cast<Status>(status));
 533         }
 534 }
 535
 536 Sci_Position SCI_METHOD Document::LineFromPosition(Sci_Position pos) const {
 537         return cb.LineFromPosition(pos);
 538 }
 539
 540 Sci::Line Document::SciLineFromPosition(Sci::Position pos) const noexcept {
 541         // Avoids casting in callers for this very common function
 542         return cb.LineFromPosition(pos);
 543 }
 544
 545 Sci::Position Document::LineStartPosition(Sci::Position position) const noexcept {
 546         return cb.LineStart(cb.LineFromPosition(position));
 547 }
 548
 549 Sci::Position Document::LineEndPosition(Sci::Position position) const noexcept {
 550         return cb.LineEnd(cb.LineFromPosition(position));
 551 }
 552
 553 bool Document::IsLineEndPosition(Sci::Position position) const noexcept {
 554         return LineEndPosition(position) == position;
 555 }
 556
 557 bool Document::IsPositionInLineEnd(Sci::Position position) const noexcept {
 558         return position >= LineEndPosition(position);
 559 }
 560
 561 Sci::Position Document::VCHomePosition(Sci::Position position) const {
 562         const Sci::Line line = SciLineFromPosition(position);
 563         const Sci::Position startPosition = LineStart(line);
 564         const Sci::Position endLine = LineEnd(line);
 565         Sci::Position startText = startPosition;
 566         while (startText < endLine && IsSpaceOrTab(cb.CharAt(startText)))
 567                 startText++;
 568         if (position == startText)
 569                 return startPosition;
 570         else
 571                 return startText;
 572 }
 573
 574 Sci::Position Document::IndexLineStart(Sci::Line line, LineCharacterIndexType lineCharacterIndex) const noexcept {
 575         return cb.IndexLineStart(line, lineCharacterIndex);
 576 }
 577
 578 Sci::Line Document::LineFromPositionIndex(Sci::Position pos, LineCharacterIndexType lineCharacterIndex) const noexcept {
 579         return cb.LineFromPositionIndex(pos, lineCharacterIndex);
 580 }
 581
 582 Sci::Line Document::LineFromPositionAfter(Sci::Line line, Sci::Position length) const noexcept {
 583         const Sci::Position posAfter = cb.LineStart(line) + length;
 584         if (posAfter >= LengthNoExcept()) {
 585                 return LinesTotal();
 586         }
 587         const Sci::Line lineAfter = SciLineFromPosition(posAfter);
 588         if (lineAfter > line) {
 589                 return lineAfter;
 590         } else {
 591                 // Want to make some progress so return next line
 592                 return lineAfter + 1;
 593         }
 594 }
 595
 596 int SCI_METHOD Document::SetLevel(Sci_Position line, int level) {
 597         const int prev = Levels()->SetLevel(line, level, LinesTotal());
 598         if (prev != level) {
 599                 DocModification mh(ModificationFlags::ChangeFold | ModificationFlags::ChangeMarker,
 600                                    LineStart(line), 0, 0, nullptr, line);
 601                 mh.foldLevelNow = static_cast<FoldLevel>(level);
 602                 mh.foldLevelPrev = static_cast<FoldLevel>(prev);
 603                 NotifyModified(mh);
 604         }
 605         return prev;
 606 }
 607
 608 int SCI_METHOD Document::GetLevel(Sci_Position line) const {
 609         return Levels()->GetLevel(line);
 610 }
 611
 612 FoldLevel Document::GetFoldLevel(Sci_Position line) const noexcept {
 613         return Levels()->GetFoldLevel(line);
 614 }
 615
 616 void Document::ClearLevels() {
 617         Levels()->ClearLevels();
 618 }
 619
 620 static bool IsSubordinate(FoldLevel levelStart, FoldLevel levelTry) noexcept {
 621         if (LevelIsWhitespace(levelTry))
 622                 return true;
 623         else
 624                 return LevelNumber(levelStart) < LevelNumber(levelTry);
 625 }
 626
 627 Sci::Line Document::GetLastChild(Sci::Line lineParent, std::optional<FoldLevel> level, Sci::Line lastLine) {
 628         const FoldLevel levelStart = LevelNumberPart(level ? *level : GetFoldLevel(lineParent));
 629         const Sci::Line maxLine = LinesTotal();
 630         const Sci::Line lookLastLine = (lastLine != -1) ? std::min(LinesTotal() - 1, lastLine) : -1;
 631         Sci::Line lineMaxSubord = lineParent;
 632         while (lineMaxSubord < maxLine - 1) {
 633                 EnsureStyledTo(LineStart(lineMaxSubord + 2));
 634                 if (!IsSubordinate(levelStart, GetFoldLevel(lineMaxSubord + 1)))
 635                         break;
 636                 if ((lookLastLine != -1) && (lineMaxSubord >= lookLastLine) && !LevelIsWhitespace(GetFoldLevel(lineMaxSubord)))
 637                         break;
 638                 lineMaxSubord++;
 639         }
 640         if (lineMaxSubord > lineParent) {
 641                 if (levelStart > LevelNumberPart(GetFoldLevel(lineMaxSubord + 1))) {
 642                         // Have chewed up some whitespace that belongs to a parent so seek back
 643                         if (LevelIsWhitespace(GetFoldLevel(lineMaxSubord))) {
 644                                 lineMaxSubord--;
 645                         }
 646                 }
 647         }
 648         return lineMaxSubord;
 649 }
 650
 651 Sci::Line Document::GetFoldParent(Sci::Line line) const noexcept {
 652         return Levels()->GetFoldParent(line);
 653 }
 654
 655 void Document::GetHighlightDelimiters(HighlightDelimiter &highlightDelimiter, Sci::Line line, Sci::Line lastLine) {
 656         const FoldLevel level = GetFoldLevel(line);
 657         const Sci::Line lookLastLine = std::max(line, lastLine) + 1;
 658
 659         Sci::Line lookLine = line;
 660         FoldLevel lookLineLevel = level;
 661         FoldLevel lookLineLevelNum = LevelNumberPart(lookLineLevel);
 662         while ((lookLine > 0) && (LevelIsWhitespace(lookLineLevel) ||
 663                 (LevelIsHeader(lookLineLevel) && (lookLineLevelNum >= LevelNumberPart(GetFoldLevel(lookLine + 1)))))) {
 664                 lookLineLevel = GetFoldLevel(--lookLine);
 665                 lookLineLevelNum = LevelNumberPart(lookLineLevel);
 666         }
 667
 668         Sci::Line beginFoldBlock = LevelIsHeader(lookLineLevel) ? lookLine : GetFoldParent(lookLine);
 669         if (beginFoldBlock == -1) {
 670                 highlightDelimiter.Clear();
 671                 return;
 672         }
 673
 674         Sci::Line endFoldBlock = GetLastChild(beginFoldBlock, {}, lookLastLine);
 675         Sci::Line firstChangeableLineBefore = -1;
 676         if (endFoldBlock < line) {
 677                 lookLine = beginFoldBlock - 1;
 678                 lookLineLevel = GetFoldLevel(lookLine);
 679                 lookLineLevelNum = LevelNumberPart(lookLineLevel);
 680                 while ((lookLine >= 0) && (lookLineLevelNum >= FoldLevel::Base)) {
 681                         if (LevelIsHeader(lookLineLevel)) {
 682                                 if (GetLastChild(lookLine, {}, lookLastLine) == line) {
 683                                         beginFoldBlock = lookLine;
 684                                         endFoldBlock = line;
 685                                         firstChangeableLineBefore = line - 1;
 686                                 }
 687                         }
 688                         if ((lookLine > 0) && (lookLineLevelNum == FoldLevel::Base) && (LevelNumberPart(GetFoldLevel(lookLine - 1)) > lookLineLevelNum))
 689                                 break;
 690                         lookLineLevel = GetFoldLevel(--lookLine);
 691                         lookLineLevelNum = LevelNumberPart(lookLineLevel);
 692                 }
 693         }
 694         if (firstChangeableLineBefore == -1) {
 695                 for (lookLine = line - 1, lookLineLevel = GetFoldLevel(lookLine), lookLineLevelNum = LevelNumberPart(lookLineLevel);
 696                         lookLine >= beginFoldBlock;
 697                         lookLineLevel = GetFoldLevel(--lookLine), lookLineLevelNum = LevelNumberPart(lookLineLevel)) {
 698                         if (LevelIsWhitespace(lookLineLevel) || (lookLineLevelNum > LevelNumberPart(level))) {
 699                                 firstChangeableLineBefore = lookLine;
 700                                 break;
 701                         }
 702                 }
 703         }
 704         if (firstChangeableLineBefore == -1)
 705                 firstChangeableLineBefore = beginFoldBlock - 1;
 706
 707         Sci::Line firstChangeableLineAfter = -1;
 708         for (lookLine = line + 1, lookLineLevel = GetFoldLevel(lookLine), lookLineLevelNum = LevelNumberPart(lookLineLevel);
 709                 lookLine <= endFoldBlock;
 710                 lookLineLevel = GetFoldLevel(++lookLine), lookLineLevelNum = LevelNumberPart(lookLineLevel)) {
 711                 if (LevelIsHeader(lookLineLevel) && (lookLineLevelNum < LevelNumberPart(GetFoldLevel(lookLine + 1)))) {
 712                         firstChangeableLineAfter = lookLine;
 713                         break;
 714                 }
 715         }
 716         if (firstChangeableLineAfter == -1)
 717                 firstChangeableLineAfter = endFoldBlock + 1;
 718
 719         highlightDelimiter.beginFoldBlock = beginFoldBlock;
 720         highlightDelimiter.endFoldBlock = endFoldBlock;
 721         highlightDelimiter.firstChangeableLineBefore = firstChangeableLineBefore;
 722         highlightDelimiter.firstChangeableLineAfter = firstChangeableLineAfter;
 723 }
 724
 725 Sci::Position Document::ClampPositionIntoDocument(Sci::Position pos) const noexcept {
 726         return std::clamp<Sci::Position>(pos, 0, LengthNoExcept());
 727 }
 728
 729 bool Document::IsCrLf(Sci::Position pos) const noexcept {
 730         if (pos < 0)
 731                 return false;
 732         if (pos >= (LengthNoExcept() - 1))
 733                 return false;
 734         return (cb.CharAt(pos) == '\r') && (cb.CharAt(pos + 1) == '\n');
 735 }
 736
 737 int Document::LenChar(Sci::Position pos) const noexcept {
 738         if (pos < 0 || pos >= LengthNoExcept()) {
 739                 // Returning 1 instead of 0 to defend against hanging with a loop that goes (or starts) out of bounds.
 740                 return 1;
 741         } else if (IsCrLf(pos)) {
 742                 return 2;
 743         }
 744
 745         const unsigned char leadByte = cb.UCharAt(pos);
 746         if (!dbcsCodePage || UTF8IsAscii(leadByte)) {
 747                 // Common case: ASCII character
 748                 return 1;
 749         }
 750         if (CpUtf8 == dbcsCodePage) {
 751                 const int widthCharBytes = UTF8BytesOfLead[leadByte];
 752                 unsigned char charBytes[UTF8MaxBytes] = { leadByte, 0, 0, 0 };
 753                 for (int b = 1; b < widthCharBytes; b++) {
 754                         charBytes[b] = cb.UCharAt(pos + b);
 755                 }
 756                 const int utf8status = UTF8Classify(charBytes, widthCharBytes);
 757                 if (utf8status & UTF8MaskInvalid) {
 758                         // Treat as invalid and use up just one byte
 759                         return 1;
 760                 } else {
 761                         return utf8status & UTF8MaskWidth;
 762                 }
 763         } else {
 764                 if (IsDBCSLeadByteNoExcept(leadByte) && IsDBCSTrailByteNoExcept(cb.CharAt(pos + 1))) {
 765                         return 2;
 766                 } else {
 767                         return 1;
 768                 }
 769         }
 770 }
 771
 772 bool Document::InGoodUTF8(Sci::Position pos, Sci::Position &start, Sci::Position &end) const noexcept {
 773         Sci::Position trail = pos;
 774         while ((trail>0) && (pos-trail < UTF8MaxBytes) && UTF8IsTrailByte(cb.UCharAt(trail-1)))
 775                 trail--;
 776         start = (trail > 0) ? trail-1 : trail;
 777
 778         const unsigned char leadByte = cb.UCharAt(start);
 779         const int widthCharBytes = UTF8BytesOfLead[leadByte];
 780         if (widthCharBytes == 1) {
 781                 return false;
 782         } else {
 783                 const int trailBytes = widthCharBytes - 1;
 784                 const Sci::Position len = pos - start;
 785                 if (len > trailBytes)
 786                         // pos too far from lead
 787                         return false;
 788                 unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0};
 789                 for (Sci::Position b=1; b<widthCharBytes && ((start+b) < cb.Length()); b++)
 790                         charBytes[b] = cb.CharAt(start+b);
 791                 const int utf8status = UTF8Classify(charBytes, widthCharBytes);
 792                 if (utf8status & UTF8MaskInvalid)
 793                         return false;
 794                 end = start + widthCharBytes;
 795                 return true;
 796         }
 797 }
 798
 799 // Normalise a position so that it is not part way through a multi-byte character.
 800 // This can occur in two situations -
 801 // When lines are terminated with \r\n pairs which should be treated as one character.
 802 // When displaying DBCS text such as Japanese.
 803 // If moving, move the position in the indicated direction.
 804 Sci::Position Document::MovePositionOutsideChar(Sci::Position pos, Sci::Position moveDir, bool checkLineEnd) const noexcept {
 805         //Platform::DebugPrintf("NoCRLF %d %d\n", pos, moveDir);
 806         // If out of range, just return minimum/maximum value.
 807         if (pos <= 0)
 808                 return 0;
 809         if (pos >= LengthNoExcept())
 810                 return LengthNoExcept();
 811
 812         // PLATFORM_ASSERT(pos > 0 && pos < LengthNoExcept());
 813         if (checkLineEnd && IsCrLf(pos - 1)) {
 814                 if (moveDir > 0)
 815                         return pos + 1;
 816                 else
 817                         return pos - 1;
 818         }
 819
 820         if (dbcsCodePage) {
 821                 if (CpUtf8 == dbcsCodePage) {
 822                         const unsigned char ch = cb.UCharAt(pos);
 823                         // If ch is not a trail byte then pos is valid intercharacter position
 824                         if (UTF8IsTrailByte(ch)) {
 825                                 Sci::Position startUTF = pos;
 826                                 Sci::Position endUTF = pos;
 827                                 if (InGoodUTF8(pos, startUTF, endUTF)) {
 828                                         // ch is a trail byte within a UTF-8 character
 829                                         if (moveDir > 0)
 830                                                 pos = endUTF;
 831                                         else
 832                                                 pos = startUTF;
 833                                 }
 834                                 // Else invalid UTF-8 so return position of isolated trail byte
 835                         }
 836                 } else {
 837                         // Anchor DBCS calculations at start of line because start of line can
 838                         // not be a DBCS trail byte.
 839                         const Sci::Position posStartLine = LineStartPosition(pos);
 840                         if (pos == posStartLine)
 841                                 return pos;
 842
 843                         // Step back until a non-lead-byte is found.
 844                         Sci::Position posCheck = pos;
 845                         while ((posCheck > posStartLine) && IsDBCSLeadByteNoExcept(cb.CharAt(posCheck-1)))
 846                                 posCheck--;
 847
 848                         // Check from known start of character.
 849                         while (posCheck < pos) {
 850                                 const int mbsize = IsDBCSDualByteAt(posCheck) ? 2 : 1;
 851                                 if (posCheck + mbsize == pos) {
 852                                         return pos;
 853                                 } else if (posCheck + mbsize > pos) {
 854                                         if (moveDir > 0) {
 855                                                 return posCheck + mbsize;
 856                                         } else {
 857                                                 return posCheck;
 858                                         }
 859                                 }
 860                                 posCheck += mbsize;
 861                         }
 862                 }
 863         }
 864
 865         return pos;
 866 }
 867
 868 // NextPosition moves between valid positions - it can not handle a position in the middle of a
 869 // multi-byte character. It is used to iterate through text more efficiently than MovePositionOutsideChar.
 870 // A \r\n pair is treated as two characters.
 871 Sci::Position Document::NextPosition(Sci::Position pos, int moveDir) const noexcept {
 872         // If out of range, just return minimum/maximum value.
 873         const int increment = (moveDir > 0) ? 1 : -1;
 874         if (pos + increment <= 0)
 875                 return 0;
 876         if (pos + increment >= cb.Length())
 877                 return cb.Length();
 878
 879         if (dbcsCodePage) {
 880                 if (CpUtf8 == dbcsCodePage) {
 881                         if (increment == 1) {
 882                                 // Simple forward movement case so can avoid some checks
 883                                 const unsigned char leadByte = cb.UCharAt(pos);
 884                                 if (UTF8IsAscii(leadByte)) {
 885                                         // Single byte character or invalid
 886                                         pos++;
 887                                 } else {
 888                                         const int widthCharBytes = UTF8BytesOfLead[leadByte];
 889                                         unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0};
 890                                         for (int b=1; b<widthCharBytes; b++)
 891                                                 charBytes[b] = cb.CharAt(pos+b);
 892                                         const int utf8status = UTF8Classify(charBytes, widthCharBytes);
 893                                         if (utf8status & UTF8MaskInvalid)
 894                                                 pos++;
 895                                         else
 896                                                 pos += utf8status & UTF8MaskWidth;
 897                                 }
 898                         } else {
 899                                 // Examine byte before position
 900                                 pos--;
 901                                 const unsigned char ch = cb.UCharAt(pos);
 902                                 // If ch is not a trail byte then pos is valid intercharacter position
 903                                 if (UTF8IsTrailByte(ch)) {
 904                                         // If ch is a trail byte in a valid UTF-8 character then return start of character
 905                                         Sci::Position startUTF = pos;
 906                                         Sci::Position endUTF = pos;
 907                                         if (InGoodUTF8(pos, startUTF, endUTF)) {
 908                                                 pos = startUTF;
 909                                         }
 910                                         // Else invalid UTF-8 so return position of isolated trail byte
 911                                 }
 912                         }
 913                 } else {
 914                         if (moveDir > 0) {
 915                                 const int mbsize = IsDBCSDualByteAt(pos) ? 2 : 1;
 916                                 pos += mbsize;
 917                                 if (pos > cb.Length())
 918                                         pos = cb.Length();
 919                         } else {
 920                                 // Anchor DBCS calculations at start of line because start of line can
 921                                 // not be a DBCS trail byte.
 922                                 const Sci::Position posStartLine = LineStartPosition(pos);
 923                                 // See http://msdn.microsoft.com/en-us/library/cc194792%28v=MSDN.10%29.aspx
 924                                 // http://msdn.microsoft.com/en-us/library/cc194790.aspx
 925                                 if ((pos - 1) <= posStartLine) {
 926                                         return pos - 1;
 927                                 } else if (IsDBCSLeadByteNoExcept(cb.CharAt(pos - 1))) {
 928                                         // Should actually be trail byte
 929                                         if (IsDBCSDualByteAt(pos - 2)) {
 930                                                 return pos - 2;
 931                                         } else {
 932                                                 // Invalid byte pair so treat as one byte wide
 933                                                 return pos - 1;
 934                                         }
 935                                 } else {
 936                                         // Otherwise, step back until a non-lead-byte is found.
 937                                         Sci::Position posTemp = pos - 1;
 938                                         while (posStartLine <= --posTemp && IsDBCSLeadByteNoExcept(cb.CharAt(posTemp)))
 939                                                 ;
 940                                         // Now posTemp+1 must point to the beginning of a character,
 941                                         // so figure out whether we went back an even or an odd
 942                                         // number of bytes and go back 1 or 2 bytes, respectively.
 943                                         const Sci::Position widthLast = ((pos - posTemp) & 1) + 1;
 944                                         if ((widthLast == 2) && (IsDBCSDualByteAt(pos - widthLast))) {
 945                                                 return pos - widthLast;
 946                                         }
 947                                         // Byte before pos may be valid character or may be an invalid second byte
 948                                         return pos - 1;
 949                                 }
 950                         }
 951                 }
 952         } else {
 953                 pos += increment;
 954         }
 955
 956         return pos;
 957 }
 958
 959 bool Document::NextCharacter(Sci::Position &pos, int moveDir) const noexcept {
 960         // Returns true if pos changed
 961         Sci::Position posNext = NextPosition(pos, moveDir);
 962         if (posNext == pos) {
 963                 return false;
 964         } else {
 965                 pos = posNext;
 966                 return true;
 967         }
 968 }
 969
 970 CharacterExtracted Document::CharacterAfter(Sci::Position position) const noexcept {
 971         if (position >= LengthNoExcept()) {
 972                 return CharacterExtracted(unicodeReplacementChar, 0);
 973         }
 974         const unsigned char leadByte = cb.UCharAt(position);
 975         if (!dbcsCodePage || UTF8IsAscii(leadByte)) {
 976                 // Common case: ASCII character
 977                 return CharacterExtracted(leadByte, 1);
 978         }
 979         if (CpUtf8 == dbcsCodePage) {
 980                 const int widthCharBytes = UTF8BytesOfLead[leadByte];
 981                 unsigned char charBytes[UTF8MaxBytes] = { leadByte, 0, 0, 0 };
 982                 for (int b = 1; b<widthCharBytes; b++)
 983                         charBytes[b] = cb.UCharAt(position + b);
 984                 return CharacterExtracted(charBytes, widthCharBytes);
 985         } else {
 986                 if (IsDBCSLeadByteNoExcept(leadByte)) {
 987                         const unsigned char trailByte = cb.UCharAt(position + 1);
 988                         if (IsDBCSTrailByteNoExcept(trailByte)) {
 989                                 return CharacterExtracted::DBCS(leadByte, trailByte);
 990                         }
 991                 }
 992                 return CharacterExtracted(leadByte, 1);
 993         }
 994 }
 995
 996 CharacterExtracted Document::CharacterBefore(Sci::Position position) const noexcept {
 997         if (position <= 0) {
 998                 return CharacterExtracted(unicodeReplacementChar, 0);
 999         }
1000         const unsigned char previousByte = cb.UCharAt(position - 1);
1001         if (0 == dbcsCodePage) {
1002                 return CharacterExtracted(previousByte, 1);
1003         }
1004         if (CpUtf8 == dbcsCodePage) {
1005                 if (UTF8IsAscii(previousByte)) {
1006                         return CharacterExtracted(previousByte, 1);
1007                 }
1008                 position--;
1009                 // If previousByte is not a trail byte then its invalid
1010                 if (UTF8IsTrailByte(previousByte)) {
1011                         // If previousByte is a trail byte in a valid UTF-8 character then find start of character
1012                         Sci::Position startUTF = position;
1013                         Sci::Position endUTF = position;
1014                         if (InGoodUTF8(position, startUTF, endUTF)) {
1015                                 const Sci::Position widthCharBytes = endUTF - startUTF;
1016                                 unsigned char charBytes[UTF8MaxBytes] = { 0, 0, 0, 0 };
1017                                 for (Sci::Position b = 0; b<widthCharBytes; b++)
1018                                         charBytes[b] = cb.UCharAt(startUTF + b);
1019                                 return CharacterExtracted(charBytes, widthCharBytes);
1020                         }
1021                         // Else invalid UTF-8 so return position of isolated trail byte
1022                 }
1023                 return CharacterExtracted(unicodeReplacementChar, 1);
1024         } else {
1025                 // Moving backwards in DBCS is complex so use NextPosition
1026                 const Sci::Position posStartCharacter = NextPosition(position, -1);
1027                 return CharacterAfter(posStartCharacter);
1028         }
1029 }
1030
1031 // Return -1  on out-of-bounds
1032 Sci_Position SCI_METHOD Document::GetRelativePosition(Sci_Position positionStart, Sci_Position characterOffset) const {
1033         Sci::Position pos = positionStart;
1034         if (dbcsCodePage) {
1035                 const int increment = (characterOffset > 0) ? 1 : -1;
1036                 while (characterOffset != 0) {
1037                         const Sci::Position posNext = NextPosition(pos, increment);
1038                         if (posNext == pos)
1039                                 return Sci::invalidPosition;
1040                         pos = posNext;
1041                         characterOffset -= increment;
1042                 }
1043         } else {
1044                 pos = positionStart + characterOffset;
1045                 if ((pos < 0) || (pos > Length()))
1046                         return Sci::invalidPosition;
1047         }
1048         return pos;
1049 }
1050
1051 Sci::Position Document::GetRelativePositionUTF16(Sci::Position positionStart, Sci::Position characterOffset) const noexcept {
1052         Sci::Position pos = positionStart;
1053         if (dbcsCodePage) {
1054                 const int increment = (characterOffset > 0) ? 1 : -1;
1055                 while (characterOffset != 0) {
1056                         const Sci::Position posNext = NextPosition(pos, increment);
1057                         if (posNext == pos)
1058                                 return Sci::invalidPosition;
1059                         if (std::abs(pos-posNext) > 3)  // 4 byte character = 2*UTF16.
1060                                 characterOffset -= increment;
1061                         pos = posNext;
1062                         characterOffset -= increment;
1063                 }
1064         } else {
1065                 pos = positionStart + characterOffset;
1066                 if ((pos < 0) || (pos > LengthNoExcept()))
1067                         return Sci::invalidPosition;
1068         }
1069         return pos;
1070 }
1071
1072 int SCI_METHOD Document::GetCharacterAndWidth(Sci_Position position, Sci_Position *pWidth) const {
1073         int bytesInCharacter = 1;
1074         const unsigned char leadByte = cb.UCharAt(position);
1075         int character = leadByte;
1076         if (dbcsCodePage && !UTF8IsAscii(leadByte)) {
1077                 if (CpUtf8 == dbcsCodePage) {
1078                         const int widthCharBytes = UTF8BytesOfLead[leadByte];
1079                         unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0};
1080                         for (int b=1; b<widthCharBytes; b++)
1081                                 charBytes[b] = cb.UCharAt(position+b);
1082                         const int utf8status = UTF8Classify(charBytes, widthCharBytes);
1083                         if (utf8status & UTF8MaskInvalid) {
1084                                 // Report as singleton surrogate values which are invalid Unicode
1085                                 character =  0xDC80 + leadByte;
1086                         } else {
1087                                 bytesInCharacter = utf8status & UTF8MaskWidth;
1088                                 character = UnicodeFromUTF8(charBytes);
1089                         }
1090                 } else {
1091                         if (IsDBCSLeadByteNoExcept(leadByte)) {
1092                                 const unsigned char trailByte = cb.UCharAt(position + 1);
1093                                 if (IsDBCSTrailByteNoExcept(trailByte)) {
1094                                         bytesInCharacter = 2;
1095                                         character = (leadByte << 8) | trailByte;
1096                                 }
1097                         }
1098                 }
1099         }
1100         if (pWidth) {
1101                 *pWidth = bytesInCharacter;
1102         }
1103         return character;
1104 }
1105
1106 int SCI_METHOD Document::CodePage() const {
1107         return dbcsCodePage;
1108 }
1109
1110 bool SCI_METHOD Document::IsDBCSLeadByte(char ch) const {
1111         // Used by lexers so must match IDocument method exactly
1112         return IsDBCSLeadByteNoExcept(ch);
1113 }
1114
1115 bool Document::IsDBCSLeadByteNoExcept(char ch) const noexcept {
1116         // Used inside core Scintilla
1117         // Byte ranges found in Wikipedia articles with relevant search strings in each case
1118         const unsigned char uch = ch;
1119         switch (dbcsCodePage) {
1120                 case 932:
1121                         // Shift_jis
1122                         return ((uch >= 0x81) && (uch <= 0x9F)) ||
1123                                 ((uch >= 0xE0) && (uch <= 0xFC));
1124                                 // Lead bytes F0 to FC may be a Microsoft addition.
1125                 case 936:
1126                         // GBK
1127                         return (uch >= 0x81) && (uch <= 0xFE);
1128                 case 949:
1129                         // Korean Wansung KS C-5601-1987
1130                         return (uch >= 0x81) && (uch <= 0xFE);
1131                 case 950:
1132                         // Big5
1133                         return (uch >= 0x81) && (uch <= 0xFE);
1134                 case 1361:
1135                         // Korean Johab KS C-5601-1992
1136                         return
1137                                 ((uch >= 0x84) && (uch <= 0xD3)) ||
1138                                 ((uch >= 0xD8) && (uch <= 0xDE)) ||
1139                                 ((uch >= 0xE0) && (uch <= 0xF9));
1140         }
1141         return false;
1142 }
1143
1144 bool Document::IsDBCSTrailByteNoExcept(char ch) const noexcept {
1145         const unsigned char trail = ch;
1146         switch (dbcsCodePage) {
1147         case 932:
1148                 // Shift_jis
1149                 return (trail != 0x7F) &&
1150                         ((trail >= 0x40) && (trail <= 0xFC));
1151         case 936:
1152                 // GBK
1153                 return (trail != 0x7F) &&
1154                         ((trail >= 0x40) && (trail <= 0xFE));
1155         case 949:
1156                 // Korean Wansung KS C-5601-1987
1157                 return
1158                         ((trail >= 0x41) && (trail <= 0x5A)) ||
1159                         ((trail >= 0x61) && (trail <= 0x7A)) ||
1160                         ((trail >= 0x81) && (trail <= 0xFE));
1161         case 950:
1162                 // Big5
1163                 return
1164                         ((trail >= 0x40) && (trail <= 0x7E)) ||
1165                         ((trail >= 0xA1) && (trail <= 0xFE));
1166         case 1361:
1167                 // Korean Johab KS C-5601-1992
1168                 return
1169                         ((trail >= 0x31) && (trail <= 0x7E)) ||
1170                         ((trail >= 0x81) && (trail <= 0xFE));
1171         }
1172         return false;
1173 }
1174
1175 int Document::DBCSDrawBytes(std::string_view text) const noexcept {
1176         if (text.length() <= 1) {
1177                 return static_cast<int>(text.length());
1178         }
1179         if (IsDBCSLeadByteNoExcept(text[0])) {
1180                 return IsDBCSTrailByteNoExcept(text[1]) ? 2 : 1;
1181         } else {
1182                 return 1;
1183         }
1184 }
1185
1186 bool Document::IsDBCSDualByteAt(Sci::Position pos) const noexcept {
1187         return IsDBCSLeadByteNoExcept(cb.CharAt(pos))
1188                 && IsDBCSTrailByteNoExcept(cb.CharAt(pos + 1));
1189 }
1190
1191 // Need to break text into segments near end but taking into account the
1192 // encoding to not break inside a UTF-8 or DBCS character and also trying
1193 // to avoid breaking inside a pair of combining characters, or inside
1194 // ligatures.
1195 // TODO: implement grapheme cluster boundaries,
1196 // see https://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries.
1197 //
1198 // The segment length must always be long enough (more than 4 bytes)
1199 // so that there will be at least one whole character to make a segment.
1200 // For UTF-8, text must consist only of valid whole characters.
1201 // In preference order from best to worst:
1202 //   1) Break before or after spaces or controls
1203 //   2) Break at word and punctuation boundary for better kerning and ligature support
1204 //   3) Break after whole character, this may break combining characters
1205
1206 size_t Document::SafeSegment(std::string_view text) const noexcept {
1207         // check space first as most written language use spaces.
1208         for (std::string_view::iterator it = text.end() - 1; it != text.begin(); --it) {
1209                 if (IsBreakSpace(*it)) {
1210                         return it - text.begin();
1211                 }
1212         }
1213
1214         if (!dbcsCodePage || dbcsCodePage == CpUtf8) {
1215                 // backward iterate for UTF-8 and single byte encoding to find word and punctuation boundary.
1216                 std::string_view::iterator it = text.end() - 1;
1217                 const bool punctuation = IsPunctuation(*it);
1218                 do {
1219                         --it;
1220                         if (punctuation != IsPunctuation(*it)) {
1221                                 return it - text.begin() + 1;
1222                         }
1223                 } while (it != text.begin());
1224
1225                 it = text.end() - 1;
1226                 if (dbcsCodePage) {
1227                         // for UTF-8 go back to the start of last character.
1228                         for (int trail = 0; trail < UTF8MaxBytes - 1 && UTF8IsTrailByte(*it); trail++) {
1229                                 --it;
1230                         }
1231                 }
1232                 return it - text.begin();
1233         }
1234
1235         {
1236                 // forward iterate for DBCS to find word and punctuation boundary.
1237                 size_t lastPunctuationBreak = 0;
1238                 size_t lastEncodingAllowedBreak = 0;
1239                 CharacterClass ccPrev = CharacterClass::space;
1240                 for (size_t j = 0; j < text.length();) {
1241                         const unsigned char ch = text[j];
1242                         lastEncodingAllowedBreak = j++;
1243
1244                         CharacterClass cc = CharacterClass::word;
1245                         if (UTF8IsAscii(ch)) {
1246                                 if (IsPunctuation(ch)) {
1247                                         cc = CharacterClass::punctuation;
1248                                 }
1249                         } else {
1250                                 j += IsDBCSLeadByteNoExcept(ch);
1251                         }
1252                         if (cc != ccPrev) {
1253                                 ccPrev = cc;
1254                                 lastPunctuationBreak = lastEncodingAllowedBreak;
1255                         }
1256                 }
1257                 return lastPunctuationBreak ? lastPunctuationBreak : lastEncodingAllowedBreak;
1258         }
1259 }
1260
1261 EncodingFamily Document::CodePageFamily() const noexcept {
1262         if (CpUtf8 == dbcsCodePage)
1263                 return EncodingFamily::unicode;
1264         else if (dbcsCodePage)
1265                 return EncodingFamily::dbcs;
1266         else
1267                 return EncodingFamily::eightBit;
1268 }
1269
1270 void Document::ModifiedAt(Sci::Position pos) noexcept {
1271         if (endStyled > pos)
1272                 endStyled = pos;
1273 }
1274
1275 void Document::CheckReadOnly() {
1276         if (cb.IsReadOnly() && enteredReadOnlyCount == 0) {
1277                 enteredReadOnlyCount++;
1278                 NotifyModifyAttempt();
1279                 enteredReadOnlyCount--;
1280         }
1281 }
1282
1283 void Document::TrimReplacement(std::string_view &text, Range &range) const noexcept {
1284         while (!text.empty() && !range.Empty() && (text.front() == CharAt(range.start))) {
1285                 text.remove_prefix(1);
1286                 range.start++;
1287         }
1288         while (!text.empty() && !range.Empty() && (text.back() == CharAt(range.end-1))) {
1289                 text.remove_suffix(1);
1290                 range.end--;
1291         }
1292 }
1293
1294 // Document only modified by gateways DeleteChars, InsertString, Undo, Redo, and SetStyleAt.
1295 // SetStyleAt does not change the persistent state of a document
1296
1297 bool Document::DeleteChars(Sci::Position pos, Sci::Position len) {
1298         if (pos < 0)
1299                 return false;
1300         if (len <= 0)
1301                 return false;
1302         if ((pos + len) > LengthNoExcept())
1303                 return false;
1304         CheckReadOnly();
1305         if (enteredModification != 0) {
1306                 return false;
1307         } else {
1308                 enteredModification++;
1309                 if (!cb.IsReadOnly()) {
1310                         NotifyModified(
1311                             DocModification(
1312                                 ModificationFlags::BeforeDelete | ModificationFlags::User,
1313                                 pos, len,
1314                                 0, nullptr));
1315                         const Sci::Line prevLinesTotal = LinesTotal();
1316                         const bool startSavePoint = cb.IsSavePoint();
1317                         bool startSequence = false;
1318                         const char *text = cb.DeleteChars(pos, len, startSequence);
1319                         if (startSavePoint && cb.IsCollectingUndo())
1320                                 NotifySavePoint(false);
1321                         if ((pos < LengthNoExcept()) || (pos == 0))
1322                                 ModifiedAt(pos);
1323                         else
1324                                 ModifiedAt(pos-1);
1325                         NotifyModified(
1326                             DocModification(
1327                                 ModificationFlags::DeleteText | ModificationFlags::User |
1328                                         (startSequence?ModificationFlags::StartAction:ModificationFlags::None),
1329                                 pos, len,
1330                                 LinesTotal() - prevLinesTotal, text));
1331                 }
1332                 enteredModification--;
1333         }
1334         return !cb.IsReadOnly();
1335 }
1336
1337 /**
1338  * Insert a string with a length.
1339  */
1340 Sci::Position Document::InsertString(Sci::Position position, const char *s, Sci::Position insertLength) {
1341         if (insertLength <= 0) {
1342                 return 0;
1343         }
1344         CheckReadOnly();        // Application may change read only state here
1345         if (cb.IsReadOnly()) {
1346                 return 0;
1347         }
1348         if (enteredModification != 0) {
1349                 return 0;
1350         }
1351         enteredModification++;
1352         insertionSet = false;
1353         insertion.clear();
1354         NotifyModified(
1355                 DocModification(
1356                         ModificationFlags::InsertCheck,
1357                         position, insertLength,
1358                         0, s));
1359         if (insertionSet) {
1360                 s = insertion.c_str();
1361                 insertLength = insertion.length();
1362         }
1363         NotifyModified(
1364                 DocModification(
1365                         ModificationFlags::BeforeInsert | ModificationFlags::User,
1366                         position, insertLength,
1367                         0, s));
1368         const Sci::Line prevLinesTotal = LinesTotal();
1369         const bool startSavePoint = cb.IsSavePoint();
1370         bool startSequence = false;
1371         const char *text = cb.InsertString(position, s, insertLength, startSequence);
1372         if (startSavePoint && cb.IsCollectingUndo())
1373                 NotifySavePoint(false);
1374         ModifiedAt(position);
1375         NotifyModified(
1376                 DocModification(
1377                         ModificationFlags::InsertText | ModificationFlags::User |
1378                         (startSequence?ModificationFlags::StartAction:ModificationFlags::None),
1379                         position, insertLength,
1380                         LinesTotal() - prevLinesTotal, text));
1381         if (insertionSet) {     // Free memory as could be large
1382                 std::string().swap(insertion);
1383         }
1384         enteredModification--;
1385         return insertLength;
1386 }
1387
1388 Sci::Position Document::InsertString(Sci::Position position, std::string_view sv) {
1389         return InsertString(position, sv.data(), sv.length());
1390 }
1391
1392 void Document::ChangeInsertion(const char *s, Sci::Position length) {
1393         insertionSet = true;
1394         insertion.assign(s, length);
1395 }
1396
1397 int SCI_METHOD Document::AddData(const char *data, Sci_Position length) {
1398         try {
1399                 const Sci::Position position = Length();
1400                 InsertString(position, data, length);
1401         } catch (std::bad_alloc &) {
1402                 return static_cast<int>(Status::BadAlloc);
1403         } catch (...) {
1404                 return static_cast<int>(Status::Failure);
1405         }
1406         return static_cast<int>(Status::Ok);
1407 }
1408
1409 IDocumentEditable *Document::AsDocumentEditable() noexcept {
1410         return static_cast<IDocumentEditable *>(this);
1411 }
1412
1413 void *SCI_METHOD Document::ConvertToDocument() {
1414         return AsDocumentEditable();
1415 }
1416
1417 Sci::Position Document::Undo() {
1418         Sci::Position newPos = -1;
1419         CheckReadOnly();
1420         if ((enteredModification == 0) && (cb.IsCollectingUndo())) {
1421                 enteredModification++;
1422                 if (!cb.IsReadOnly()) {
1423                         const bool startSavePoint = cb.IsSavePoint();
1424                         bool multiLine = false;
1425                         const int steps = cb.StartUndo();
1426                         //Platform::DebugPrintf("Steps=%d\n", steps);
1427                         Range coalescedRemove;  // Default is empty at 0
1428                         for (int step = 0; step < steps; step++) {
1429                                 const Sci::Line prevLinesTotal = LinesTotal();
1430                                 const Action action = cb.GetUndoStep();
1431                                 if (action.at == ActionType::remove) {
1432                                         NotifyModified(DocModification(
1433                                                                         ModificationFlags::BeforeInsert | ModificationFlags::Undo, action));
1434                                 } else if (action.at == ActionType::container) {
1435                                         DocModification dm(ModificationFlags::Container | ModificationFlags::Undo);
1436                                         dm.token = action.position;
1437                                         NotifyModified(dm);
1438                                 } else {
1439                                         NotifyModified(DocModification(
1440                                                                         ModificationFlags::BeforeDelete | ModificationFlags::Undo, action));
1441                                 }
1442                                 cb.PerformUndoStep();
1443                                 if (action.at != ActionType::container) {
1444                                         ModifiedAt(action.position);
1445                                         newPos = action.position;
1446                                 }
1447
1448                                 ModificationFlags modFlags = ModificationFlags::Undo;
1449                                 // With undo, an insertion action becomes a deletion notification
1450                                 if (action.at == ActionType::remove) {
1451                                         newPos += action.lenData;
1452                                         modFlags |= ModificationFlags::InsertText;
1453                                         if (coalescedRemove.Contains(action.position)) {
1454                                                 coalescedRemove.end += action.lenData;
1455                                                 newPos = coalescedRemove.end;
1456                                         } else {
1457                                                 coalescedRemove = Range(action.position, action.position + action.lenData);
1458                                         }
1459                                 } else if (action.at == ActionType::insert) {
1460                                         modFlags |= ModificationFlags::DeleteText;
1461                                         coalescedRemove = Range();
1462                                 }
1463                                 if (steps > 1)
1464                                         modFlags |= ModificationFlags::MultiStepUndoRedo;
1465                                 const Sci::Line linesAdded = LinesTotal() - prevLinesTotal;
1466                                 if (linesAdded != 0)
1467                                         multiLine = true;
1468                                 if (step == steps - 1) {
1469                                         modFlags |= ModificationFlags::LastStepInUndoRedo;
1470                                         if (multiLine)
1471                                                 modFlags |= ModificationFlags::MultilineUndoRedo;
1472                                 }
1473                                 NotifyModified(DocModification(modFlags, action.position, action.lenData,
1474                                                                                            linesAdded, action.data));
1475                         }
1476
1477                         const bool endSavePoint = cb.IsSavePoint();
1478                         if (startSavePoint != endSavePoint)
1479                                 NotifySavePoint(endSavePoint);
1480                 }
1481                 enteredModification--;
1482         }
1483         return newPos;
1484 }
1485
1486 Sci::Position Document::Redo() {
1487         Sci::Position newPos = -1;
1488         CheckReadOnly();
1489         if ((enteredModification == 0) && (cb.IsCollectingUndo())) {
1490                 enteredModification++;
1491                 if (!cb.IsReadOnly()) {
1492                         const bool startSavePoint = cb.IsSavePoint();
1493                         bool multiLine = false;
1494                         const int steps = cb.StartRedo();
1495                         for (int step = 0; step < steps; step++) {
1496                                 const Sci::Line prevLinesTotal = LinesTotal();
1497                                 const Action action = cb.GetRedoStep();
1498                                 if (action.at == ActionType::insert) {
1499                                         NotifyModified(DocModification(
1500                                                                         ModificationFlags::BeforeInsert | ModificationFlags::Redo, action));
1501                                 } else if (action.at == ActionType::container) {
1502                                         DocModification dm(ModificationFlags::Container | ModificationFlags::Redo);
1503                                         dm.token = action.position;
1504                                         NotifyModified(dm);
1505                                 } else {
1506                                         NotifyModified(DocModification(
1507                                                                         ModificationFlags::BeforeDelete | ModificationFlags::Redo, action));
1508                                 }
1509                                 cb.PerformRedoStep();
1510                                 if (action.at != ActionType::container) {
1511                                         ModifiedAt(action.position);
1512                                         newPos = action.position;
1513                                 }
1514
1515                                 ModificationFlags modFlags = ModificationFlags::Redo;
1516                                 if (action.at == ActionType::insert) {
1517                                         newPos += action.lenData;
1518                                         modFlags |= ModificationFlags::InsertText;
1519                                 } else if (action.at == ActionType::remove) {
1520                                         modFlags |= ModificationFlags::DeleteText;
1521                                 }
1522                                 if (steps > 1)
1523                                         modFlags |= ModificationFlags::MultiStepUndoRedo;
1524                                 const Sci::Line linesAdded = LinesTotal() - prevLinesTotal;
1525                                 if (linesAdded != 0)
1526                                         multiLine = true;
1527                                 if (step == steps - 1) {
1528                                         modFlags |= ModificationFlags::LastStepInUndoRedo;
1529                                         if (multiLine)
1530                                                 modFlags |= ModificationFlags::MultilineUndoRedo;
1531                                 }
1532                                 NotifyModified(
1533                                         DocModification(modFlags, action.position, action.lenData,
1534                                                                         linesAdded, action.data));
1535                         }
1536
1537                         const bool endSavePoint = cb.IsSavePoint();
1538                         if (startSavePoint != endSavePoint)
1539                                 NotifySavePoint(endSavePoint);
1540                 }
1541                 enteredModification--;
1542         }
1543         return newPos;
1544 }
1545
1546 int Document::UndoSequenceDepth() const noexcept {
1547         return cb.UndoSequenceDepth();
1548 }
1549
1550 void Document::DelChar(Sci::Position pos) {
1551         DeleteChars(pos, LenChar(pos));
1552 }
1553
1554 void Document::DelCharBack(Sci::Position pos) {
1555         if (pos <= 0) {
1556                 return;
1557         } else if (IsCrLf(pos - 2)) {
1558                 DeleteChars(pos - 2, 2);
1559         } else if (dbcsCodePage) {
1560                 const Sci::Position startChar = NextPosition(pos, -1);
1561                 DeleteChars(startChar, pos - startChar);
1562         } else {
1563                 DeleteChars(pos - 1, 1);
1564         }
1565 }
1566
1567 static constexpr Sci::Position NextTab(Sci::Position pos, Sci::Position tabSize) noexcept {
1568         return ((pos / tabSize) + 1) * tabSize;
1569 }
1570
1571 static std::string CreateIndentation(Sci::Position indent, int tabSize, bool insertSpaces) {
1572         std::string indentation;
1573         if (!insertSpaces) {
1574                 while (indent >= tabSize) {
1575                         indentation += '\t';
1576                         indent -= tabSize;
1577                 }
1578         }
1579         while (indent > 0) {
1580                 indentation += ' ';
1581                 indent--;
1582         }
1583         return indentation;
1584 }
1585
1586 int SCI_METHOD Document::GetLineIndentation(Sci_Position line) {
1587         int indent = 0;
1588         if ((line >= 0) && (line < LinesTotal())) {
1589                 const Sci::Position lineStart = LineStart(line);
1590                 const Sci::Position length = Length();
1591                 for (Sci::Position i = lineStart; i < length; i++) {
1592                         const char ch = cb.CharAt(i);
1593                         if (ch == ' ')
1594                                 indent++;
1595                         else if (ch == '\t')
1596                                 indent = static_cast<int>(NextTab(indent, tabInChars));
1597                         else
1598                                 return indent;
1599                 }
1600         }
1601         return indent;
1602 }
1603
1604 Sci::Position Document::SetLineIndentation(Sci::Line line, Sci::Position indent) {
1605         const int indentOfLine = GetLineIndentation(line);
1606         if (indent < 0)
1607                 indent = 0;
1608         if (indent != indentOfLine) {
1609                 const std::string linebuf = CreateIndentation(indent, tabInChars, !useTabs);
1610                 const Sci::Position thisLineStart = LineStart(line);
1611                 const Sci::Position indentPos = GetLineIndentPosition(line);
1612                 UndoGroup ug(this);
1613                 DeleteChars(thisLineStart, indentPos - thisLineStart);
1614                 return thisLineStart + InsertString(thisLineStart, linebuf);
1615         } else {
1616                 return GetLineIndentPosition(line);
1617         }
1618 }
1619
1620 Sci::Position Document::GetLineIndentPosition(Sci::Line line) const {
1621         if (line < 0)
1622                 return 0;
1623         Sci::Position pos = LineStart(line);
1624         const Sci::Position length = Length();
1625         while ((pos < length) && IsSpaceOrTab(cb.CharAt(pos))) {
1626                 pos++;
1627         }
1628         return pos;
1629 }
1630
1631 Sci::Position Document::GetColumn(Sci::Position pos) const {
1632         Sci::Position column = 0;
1633         const Sci::Line line = SciLineFromPosition(pos);
1634         if ((line >= 0) && (line < LinesTotal())) {
1635                 for (Sci::Position i = LineStart(line); i < pos;) {
1636                         const char ch = cb.CharAt(i);
1637                         if (ch == '\t') {
1638                                 column = NextTab(column, tabInChars);
1639                                 i++;
1640                         } else if (ch == '\r') {
1641                                 return column;
1642                         } else if (ch == '\n') {
1643                                 return column;
1644                         } else if (i >= Length()) {
1645                                 return column;
1646                         } else if (UTF8IsAscii(ch)) {
1647                                 column++;
1648                                 i++;
1649                         } else {
1650                                 column++;
1651                                 i = NextPosition(i, 1);
1652                         }
1653                 }
1654         }
1655         return column;
1656 }
1657
1658 Sci::Position Document::CountCharacters(Sci::Position startPos, Sci::Position endPos) const noexcept {
1659         startPos = MovePositionOutsideChar(startPos, 1, false);
1660         endPos = MovePositionOutsideChar(endPos, -1, false);
1661         Sci::Position count = 0;
1662         Sci::Position i = startPos;
1663         while (i < endPos) {
1664                 count++;
1665                 i = NextPosition(i, 1);
1666         }
1667         return count;
1668 }
1669
1670 Sci::Position Document::CountUTF16(Sci::Position startPos, Sci::Position endPos) const noexcept {
1671         startPos = MovePositionOutsideChar(startPos, 1, false);
1672         endPos = MovePositionOutsideChar(endPos, -1, false);
1673         Sci::Position count = 0;
1674         Sci::Position i = startPos;
1675         while (i < endPos) {
1676                 count++;
1677                 const Sci::Position next = NextPosition(i, 1);
1678                 if ((next - i) > 3)
1679                         count++;
1680                 i = next;
1681         }
1682         return count;
1683 }
1684
1685 Sci::Position Document::FindColumn(Sci::Line line, Sci::Position column) {
1686         Sci::Position position = LineStart(line);
1687         if ((line >= 0) && (line < LinesTotal())) {
1688                 Sci::Position columnCurrent = 0;
1689                 while ((columnCurrent < column) && (position < Length())) {
1690                         const char ch = cb.CharAt(position);
1691                         if (ch == '\t') {
1692                                 columnCurrent = NextTab(columnCurrent, tabInChars);
1693                                 if (columnCurrent > column)
1694                                         return position;
1695                                 position++;
1696                         } else if (ch == '\r') {
1697                                 return position;
1698                         } else if (ch == '\n') {
1699                                 return position;
1700                         } else {
1701                                 columnCurrent++;
1702                                 position = NextPosition(position, 1);
1703                         }
1704                 }
1705         }
1706         return position;
1707 }
1708
1709 void Document::Indent(bool forwards, Sci::Line lineBottom, Sci::Line lineTop) {
1710         // Dedent - suck white space off the front of the line to dedent by equivalent of a tab
1711         for (Sci::Line line = lineBottom; line >= lineTop; line--) {
1712                 const Sci::Position indentOfLine = GetLineIndentation(line);
1713                 if (forwards) {
1714                         if (LineStart(line) < LineEnd(line)) {
1715                                 SetLineIndentation(line, indentOfLine + IndentSize());
1716                         }
1717                 } else {
1718                         SetLineIndentation(line, indentOfLine - IndentSize());
1719                 }
1720         }
1721 }
1722
1723 namespace {
1724
1725 constexpr std::string_view EOLForMode(EndOfLine eolMode) noexcept {
1726         switch (eolMode) {
1727         case EndOfLine::CrLf:
1728                 return "\r\n";
1729         case EndOfLine::Cr:
1730                 return "\r";
1731         default:
1732                 return "\n";
1733         }
1734 }
1735
1736 }
1737
1738 // Convert line endings for a piece of text to a particular mode.
1739 // Stop at len or when a NUL is found.
1740 std::string Document::TransformLineEnds(const char *s, size_t len, EndOfLine eolModeWanted) {
1741         std::string dest;
1742         const std::string_view eol = EOLForMode(eolModeWanted);
1743         for (size_t i = 0; (i < len) && (s[i]); i++) {
1744                 if (s[i] == '\n' || s[i] == '\r') {
1745                         dest.append(eol);
1746                         if ((s[i] == '\r') && (i+1 < len) && (s[i+1] == '\n')) {
1747                                 i++;
1748                         }
1749                 } else {
1750                         dest.push_back(s[i]);
1751                 }
1752         }
1753         return dest;
1754 }
1755
1756 void Document::ConvertLineEnds(EndOfLine eolModeSet) {
1757         UndoGroup ug(this);
1758
1759         for (Sci::Position pos = 0; pos < Length(); pos++) {
1760                 const char ch = cb.CharAt(pos);
1761                 if (ch == '\r') {
1762                         if (cb.CharAt(pos + 1) == '\n') {
1763                                 // CRLF
1764                                 if (eolModeSet == EndOfLine::Cr) {
1765                                         DeleteChars(pos + 1, 1); // Delete the LF
1766                                 } else if (eolModeSet == EndOfLine::Lf) {
1767                                         DeleteChars(pos, 1); // Delete the CR
1768                                 } else {
1769                                         pos++;
1770                                 }
1771                         } else {
1772                                 // CR
1773                                 if (eolModeSet == EndOfLine::CrLf) {
1774                                         pos += InsertString(pos + 1, "\n", 1); // Insert LF
1775                                 } else if (eolModeSet == EndOfLine::Lf) {
1776                                         pos += InsertString(pos, "\n", 1); // Insert LF
1777                                         DeleteChars(pos, 1); // Delete CR
1778                                         pos--;
1779                                 }
1780                         }
1781                 } else if (ch == '\n') {
1782                         // LF
1783                         if (eolModeSet == EndOfLine::CrLf) {
1784                                 pos += InsertString(pos, "\r", 1); // Insert CR
1785                         } else if (eolModeSet == EndOfLine::Cr) {
1786                                 pos += InsertString(pos, "\r", 1); // Insert CR
1787                                 DeleteChars(pos, 1); // Delete LF
1788                                 pos--;
1789                         }
1790                 }
1791         }
1792
1793 }
1794
1795 std::string_view Document::EOLString() const noexcept {
1796         return EOLForMode(eolMode);
1797 }
1798
1799 DocumentOption Document::Options() const noexcept {
1800         return (IsLarge() ? DocumentOption::TextLarge : DocumentOption::Default) |
1801                 (cb.HasStyles() ? DocumentOption::Default : DocumentOption::StylesNone);
1802 }
1803
1804 bool Document::IsWhiteLine(Sci::Line line) const {
1805         Sci::Position currentChar = LineStart(line);
1806         const Sci::Position endLine = LineEnd(line);
1807         while (currentChar < endLine) {
1808                 if (!IsSpaceOrTab(cb.CharAt(currentChar))) {
1809                         return false;
1810                 }
1811                 ++currentChar;
1812         }
1813         return true;
1814 }
1815
1816 Sci::Position Document::ParaUp(Sci::Position pos) const {
1817         Sci::Line line = SciLineFromPosition(pos);
1818         const Sci::Position start = LineStart(line);
1819         if (pos == start) {
1820                 line--;
1821         }
1822         while (line >= 0 && IsWhiteLine(line)) { // skip empty lines
1823                 line--;
1824         }
1825         while (line >= 0 && !IsWhiteLine(line)) { // skip non-empty lines
1826                 line--;
1827         }
1828         line++;
1829         return LineStart(line);
1830 }
1831
1832 Sci::Position Document::ParaDown(Sci::Position pos) const {
1833         Sci::Line line = SciLineFromPosition(pos);
1834         while (line < LinesTotal() && !IsWhiteLine(line)) { // skip non-empty lines
1835                 line++;
1836         }
1837         while (line < LinesTotal() && IsWhiteLine(line)) { // skip empty lines
1838                 line++;
1839         }
1840         if (line < LinesTotal())
1841                 return LineStart(line);
1842         else // end of a document
1843                 return LineEnd(line-1);
1844 }
1845
1846 CharacterClass Document::WordCharacterClass(unsigned int ch) const {
1847         if (dbcsCodePage && (ch >= 0x80)) {
1848                 if (CpUtf8 == dbcsCodePage) {
1849                         // Use hard coded Unicode class
1850                         const CharacterCategory cc = charMap.CategoryFor(ch);
1851                         switch (cc) {
1852
1853                                 // Separator, Line/Paragraph
1854                         case ccZl:
1855                         case ccZp:
1856                                 return CharacterClass::newLine;
1857
1858                                 // Separator, Space
1859                         case ccZs:
1860                                 // Other
1861                         case ccCc:
1862                         case ccCf:
1863                         case ccCs:
1864                         case ccCo:
1865                         case ccCn:
1866                                 return CharacterClass::space;
1867
1868                                 // Letter
1869                         case ccLu:
1870                         case ccLl:
1871                         case ccLt:
1872                         case ccLm:
1873                         case ccLo:
1874                                 // Number
1875                         case ccNd:
1876                         case ccNl:
1877                         case ccNo:
1878                                 // Mark - includes combining diacritics
1879                         case ccMn:
1880                         case ccMc:
1881                         case ccMe:
1882                                 return CharacterClass::word;
1883
1884                                 // Punctuation
1885                         case ccPc:
1886                         case ccPd:
1887                         case ccPs:
1888                         case ccPe:
1889                         case ccPi:
1890                         case ccPf:
1891                         case ccPo:
1892                                 // Symbol
1893                         case ccSm:
1894                         case ccSc:
1895                         case ccSk:
1896                         case ccSo:
1897                                 return CharacterClass::punctuation;
1898
1899                         }
1900                 } else {
1901                         // Asian DBCS
1902                         return CharacterClass::word;
1903                 }
1904         }
1905         return charClass.GetClass(static_cast<unsigned char>(ch));
1906 }
1907
1908 /**
1909  * Used by commands that want to select whole words.
1910  * Finds the start of word at pos when delta < 0 or the end of the word when delta >= 0.
1911  */
1912 Sci::Position Document::ExtendWordSelect(Sci::Position pos, int delta, bool onlyWordCharacters) const {
1913         CharacterClass ccStart = CharacterClass::word;
1914         if (delta < 0) {
1915                 if (!onlyWordCharacters) {
1916                         const CharacterExtracted ce = CharacterBefore(pos);
1917                         ccStart = WordCharacterClass(ce.character);
1918                 }
1919                 while (pos > 0) {
1920                         const CharacterExtracted ce = CharacterBefore(pos);
1921                         if (WordCharacterClass(ce.character) != ccStart)
1922                                 break;
1923                         pos -= ce.widthBytes;
1924                 }
1925         } else {
1926                 if (!onlyWordCharacters && pos < LengthNoExcept()) {
1927                         const CharacterExtracted ce = CharacterAfter(pos);
1928                         ccStart = WordCharacterClass(ce.character);
1929                 }
1930                 while (pos < LengthNoExcept()) {
1931                         const CharacterExtracted ce = CharacterAfter(pos);
1932                         if (WordCharacterClass(ce.character) != ccStart)
1933                                 break;
1934                         pos += ce.widthBytes;
1935                 }
1936         }
1937         return MovePositionOutsideChar(pos, delta, true);
1938 }
1939
1940 /**
1941  * Find the start of the next word in either a forward (delta >= 0) or backwards direction
1942  * (delta < 0).
1943  * This is looking for a transition between character classes although there is also some
1944  * additional movement to transit white space.
1945  * Used by cursor movement by word commands.
1946  */
1947 Sci::Position Document::NextWordStart(Sci::Position pos, int delta) const {
1948         if (delta < 0) {
1949                 while (pos > 0) {
1950                         const CharacterExtracted ce = CharacterBefore(pos);
1951                         if (WordCharacterClass(ce.character) != CharacterClass::space)
1952                                 break;
1953                         pos -= ce.widthBytes;
1954                 }
1955                 if (pos > 0) {
1956                         CharacterExtracted ce = CharacterBefore(pos);
1957                         const CharacterClass ccStart = WordCharacterClass(ce.character);
1958                         while (pos > 0) {
1959                                 ce = CharacterBefore(pos);
1960                                 if (WordCharacterClass(ce.character) != ccStart)
1961                                         break;
1962                                 pos -= ce.widthBytes;
1963                         }
1964                 }
1965         } else {
1966                 CharacterExtracted ce = CharacterAfter(pos);
1967                 const CharacterClass ccStart = WordCharacterClass(ce.character);
1968                 while (pos < LengthNoExcept()) {
1969                         ce = CharacterAfter(pos);
1970                         if (WordCharacterClass(ce.character) != ccStart)
1971                                 break;
1972                         pos += ce.widthBytes;
1973                 }
1974                 while (pos < LengthNoExcept()) {
1975                         ce = CharacterAfter(pos);
1976                         if (WordCharacterClass(ce.character) != CharacterClass::space)
1977                                 break;
1978                         pos += ce.widthBytes;
1979                 }
1980         }
1981         return pos;
1982 }
1983
1984 /**
1985  * Find the end of the next word in either a forward (delta >= 0) or backwards direction
1986  * (delta < 0).
1987  * This is looking for a transition between character classes although there is also some
1988  * additional movement to transit white space.
1989  * Used by cursor movement by word commands.
1990  */
1991 Sci::Position Document::NextWordEnd(Sci::Position pos, int delta) const {
1992         if (delta < 0) {
1993                 if (pos > 0) {
1994                         CharacterExtracted ce = CharacterBefore(pos);
1995                         const CharacterClass ccStart = WordCharacterClass(ce.character);
1996                         if (ccStart != CharacterClass::space) {
1997                                 while (pos > 0) {
1998                                         ce = CharacterBefore(pos);
1999                                         if (WordCharacterClass(ce.character) != ccStart)
2000                                                 break;
2001                                         pos -= ce.widthBytes;
2002                                 }
2003                         }
2004                         while (pos > 0) {
2005                                 ce = CharacterBefore(pos);
2006                                 if (WordCharacterClass(ce.character) != CharacterClass::space)
2007                                         break;
2008                                 pos -= ce.widthBytes;
2009                         }
2010                 }
2011         } else {
2012                 while (pos < LengthNoExcept()) {
2013                         const CharacterExtracted ce = CharacterAfter(pos);
2014                         if (WordCharacterClass(ce.character) != CharacterClass::space)
2015                                 break;
2016                         pos += ce.widthBytes;
2017                 }
2018                 if (pos < LengthNoExcept()) {
2019                         CharacterExtracted ce = CharacterAfter(pos);
2020                         const CharacterClass ccStart = WordCharacterClass(ce.character);
2021                         while (pos < LengthNoExcept()) {
2022                                 ce = CharacterAfter(pos);
2023                                 if (WordCharacterClass(ce.character) != ccStart)
2024                                         break;
2025                                 pos += ce.widthBytes;
2026                         }
2027                 }
2028         }
2029         return pos;
2030 }
2031
2032 namespace {
2033
2034 constexpr bool IsWordEdge(CharacterClass cc, CharacterClass ccNext) noexcept {
2035         return (cc != ccNext) &&
2036                 (cc == CharacterClass::word || cc == CharacterClass::punctuation);
2037 }
2038
2039 }
2040
2041 /**
2042  * Check that the character at the given position is a word or punctuation character and that
2043  * the previous character is of a different character class.
2044  */
2045 bool Document::IsWordStartAt(Sci::Position pos) const {
2046         if (pos >= LengthNoExcept())
2047                 return false;
2048         if (pos >= 0) {
2049                 const CharacterExtracted cePos = CharacterAfter(pos);
2050                 // At start of document, treat as if space before so can be word start
2051                 const CharacterExtracted cePrev = (pos > 0) ?
2052                         CharacterBefore(pos) : CharacterExtracted(' ', 1);
2053                 return IsWordEdge(WordCharacterClass(cePos.character), WordCharacterClass(cePrev.character));
2054         }
2055         return true;
2056 }
2057
2058 /**
2059  * Check that the character before the given position is a word or punctuation character and that
2060  * the next character is of a different character class.
2061  */
2062 bool Document::IsWordEndAt(Sci::Position pos) const {
2063         if (pos <= 0)
2064                 return false;
2065         if (pos <= LengthNoExcept()) {
2066                 // At end of document, treat as if space after so can be word end
2067                 const CharacterExtracted cePos = (pos < LengthNoExcept()) ?
2068                         CharacterAfter(pos) : CharacterExtracted(' ', 1);
2069                 const CharacterExtracted cePrev = CharacterBefore(pos);
2070                 return IsWordEdge(WordCharacterClass(cePrev.character), WordCharacterClass(cePos.character));
2071         }
2072         return true;
2073 }
2074
2075 /**
2076  * Check that the given range is has transitions between character classes at both
2077  * ends and where the characters on the inside are word or punctuation characters.
2078  */
2079 bool Document::IsWordAt(Sci::Position start, Sci::Position end) const {
2080         return (start < end) && IsWordStartAt(start) && IsWordEndAt(end);
2081 }
2082
2083 bool Document::MatchesWordOptions(bool word, bool wordStart, Sci::Position pos, Sci::Position length) const {
2084         return (!word && !wordStart) ||
2085                         (word && IsWordAt(pos, pos + length)) ||
2086                         (wordStart && IsWordStartAt(pos));
2087 }
2088
2089 bool Document::HasCaseFolder() const noexcept {
2090         return pcf != nullptr;
2091 }
2092
2093 void Document::SetCaseFolder(std::unique_ptr<CaseFolder> pcf_) noexcept {
2094         pcf = std::move(pcf_);
2095 }
2096
2097 CharacterExtracted Document::ExtractCharacter(Sci::Position position) const noexcept {
2098         const unsigned char leadByte = cb.UCharAt(position);
2099         if (UTF8IsAscii(leadByte)) {
2100                 // Common case: ASCII character
2101                 return CharacterExtracted(leadByte, 1);
2102         }
2103         const int widthCharBytes = UTF8BytesOfLead[leadByte];
2104         unsigned char charBytes[UTF8MaxBytes] = { leadByte, 0, 0, 0 };
2105         for (int b=1; b<widthCharBytes; b++)
2106                 charBytes[b] = cb.UCharAt(position + b);
2107         return CharacterExtracted(charBytes, widthCharBytes);
2108 }
2109
2110 namespace {
2111
2112 // Equivalent of memchr over the split view
2113 ptrdiff_t SplitFindChar(const SplitView &view, size_t start, size_t length, int ch) noexcept {
2114         size_t range1Length = 0;
2115         if (start < view.length1) {
2116                 range1Length = std::min(length, view.length1 - start);
2117                 const char *match = static_cast<const char *>(memchr(view.segment1 + start, ch, range1Length));
2118                 if (match) {
2119                         return match - view.segment1;
2120                 }
2121                 start += range1Length;
2122         }
2123         const char *match2 = static_cast<const char *>(memchr(view.segment2 + start, ch, length - range1Length));
2124         if (match2) {
2125                 return match2 - view.segment2;
2126         }
2127         return -1;
2128 }
2129
2130 // Equivalent of memcmp over the split view
2131 // This does not call memcmp as search texts are commonly too short to overcome the
2132 // call overhead.
2133 bool SplitMatch(const SplitView &view, size_t start, std::string_view text) noexcept {
2134         for (size_t i = 0; i < text.length(); i++) {
2135                 if (view.CharAt(i + start) != text[i]) {
2136                         return false;
2137                 }
2138         }
2139         return true;
2140 }
2141
2142 }
2143
2144 /**
2145  * Find text in document, supporting both forward and backward
2146  * searches (just pass minPos > maxPos to do a backward search)
2147  * Has not been tested with backwards DBCS searches yet.
2148  */
2149 Sci::Position Document::FindText(Sci::Position minPos, Sci::Position maxPos, const char *search,
2150                         FindOption flags, Sci::Position *length) {
2151         if (*length <= 0)
2152                 return minPos;
2153         const bool caseSensitive = FlagSet(flags, FindOption::MatchCase);
2154         const bool word = FlagSet(flags, FindOption::WholeWord);
2155         const bool wordStart = FlagSet(flags, FindOption::WordStart);
2156         const bool regExp = FlagSet(flags, FindOption::RegExp);
2157         if (regExp) {
2158                 if (!regex)
2159                         regex = std::unique_ptr<RegexSearchBase>(CreateRegexSearch(&charClass));
2160                 return regex->FindText(this, minPos, maxPos, search, caseSensitive, word, wordStart, flags, length);
2161         } else {
2162
2163                 const bool forward = minPos <= maxPos;
2164                 const int increment = forward ? 1 : -1;
2165
2166                 // Range endpoints should not be inside DBCS characters, but just in case, move them.
2167                 const Sci::Position startPos = MovePositionOutsideChar(minPos, increment, false);
2168                 const Sci::Position endPos = MovePositionOutsideChar(maxPos, increment, false);
2169
2170                 // Compute actual search ranges needed
2171                 const Sci::Position lengthFind = *length;
2172
2173                 //Platform::DebugPrintf("Find %d %d %s %d\n", startPos, endPos, ft->lpstrText, lengthFind);
2174                 const Sci::Position limitPos = std::max(startPos, endPos);
2175                 Sci::Position pos = startPos;
2176                 if (!forward) {
2177                         // Back all of a character
2178                         pos = NextPosition(pos, increment);
2179                 }
2180                 const SplitView cbView = cb.AllView();
2181                 if (caseSensitive) {
2182                         const Sci::Position endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos;
2183                         const unsigned char charStartSearch =  search[0];
2184                         if (forward && ((0 == dbcsCodePage) || (CpUtf8 == dbcsCodePage && !UTF8IsTrailByte(charStartSearch)))) {
2185                                 // This is a fast case where there is no need to test byte values to iterate
2186                                 // so becomes the equivalent of a memchr+memcmp loop.
2187                                 // UTF-8 search will not be self-synchronizing when starts with trail byte
2188                                 const std::string_view suffix(search + 1, lengthFind - 1);
2189                                 while (pos < endSearch) {
2190                                         pos = SplitFindChar(cbView, pos, limitPos - pos, charStartSearch);
2191                                         if (pos < 0) {
2192                                                 break;
2193                                         }
2194                                         if (SplitMatch(cbView, pos + 1, suffix) && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
2195                                                 return pos;
2196                                         }
2197                                         pos++;
2198                                 }
2199                         } else {
2200                                 while (forward ? (pos < endSearch) : (pos >= endSearch)) {
2201                                         const unsigned char leadByte = cbView.CharAt(pos);
2202                                         if (leadByte == charStartSearch) {
2203                                                 bool found = (pos + lengthFind) <= limitPos;
2204                                                 // SplitMatch could be called here but it is slower with g++ -O2
2205                                                 for (int indexSearch = 1; (indexSearch < lengthFind) && found; indexSearch++) {
2206                                                         found = cbView.CharAt(pos + indexSearch) == search[indexSearch];
2207                                                 }
2208                                                 if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
2209                                                         return pos;
2210                                                 }
2211                                         }
2212                                         if (forward && UTF8IsAscii(leadByte)) {
2213                                                 pos++;
2214                                         } else {
2215                                                 if (dbcsCodePage) {
2216                                                         if (!NextCharacter(pos, increment)) {
2217                                                                 break;
2218                                                         }
2219                                                 } else {
2220                                                         pos += increment;
2221                                                 }
2222                                         }
2223                                 }
2224                         }
2225                 } else if (CpUtf8 == dbcsCodePage) {
2226                         constexpr size_t maxFoldingExpansion = 4;
2227                         std::vector<char> searchThing((lengthFind+1) * UTF8MaxBytes * maxFoldingExpansion + 1);
2228                         const size_t lenSearch =
2229                                 pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind);
2230                         while (forward ? (pos < endPos) : (pos >= endPos)) {
2231                                 int widthFirstCharacter = 1;
2232                                 Sci::Position posIndexDocument = pos;
2233                                 size_t indexSearch = 0;
2234                                 bool characterMatches = true;
2235                                 while (indexSearch < lenSearch) {
2236                                         const unsigned char leadByte = cbView.CharAt(posIndexDocument);
2237                                         int widthChar = 1;
2238                                         size_t lenFlat = 1;
2239                                         if (UTF8IsAscii(leadByte)) {
2240                                                 if ((posIndexDocument + 1) > limitPos) {
2241                                                         break;
2242                                                 }
2243                                                 characterMatches = searchThing[indexSearch] == MakeLowerCase(leadByte);
2244                                         } else {
2245                                                 char bytes[UTF8MaxBytes]{ static_cast<char>(leadByte) };
2246                                                 const int widthCharBytes = UTF8BytesOfLead[leadByte];
2247                                                 for (int b = 1; b < widthCharBytes; b++) {
2248                                                         bytes[b] = cbView.CharAt(posIndexDocument + b);
2249                                                 }
2250                                                 widthChar = UTF8Classify(bytes, widthCharBytes) & UTF8MaskWidth;
2251                                                 if (!indexSearch) {     // First character
2252                                                         widthFirstCharacter = widthChar;
2253                                                 }
2254                                                 if ((posIndexDocument + widthChar) > limitPos) {
2255                                                         break;
2256                                                 }
2257                                                 char folded[UTF8MaxBytes * maxFoldingExpansion + 1];
2258                                                 lenFlat = pcf->Fold(folded, sizeof(folded), bytes, widthChar);
2259                                                 // memcmp may examine lenFlat bytes in both arguments so assert it doesn't read past end of searchThing
2260                                                 assert((indexSearch + lenFlat) <= searchThing.size());
2261                                                 // Does folded match the buffer
2262                                                 characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat);
2263                                         }
2264                                         if (!characterMatches) {
2265                                                 break;
2266                                         }
2267                                         posIndexDocument += widthChar;
2268                                         indexSearch += lenFlat;
2269                                 }
2270                                 if (characterMatches && (indexSearch == lenSearch)) {
2271                                         if (MatchesWordOptions(word, wordStart, pos, posIndexDocument - pos)) {
2272                                                 *length = posIndexDocument - pos;
2273                                                 return pos;
2274                                         }
2275                                 }
2276                                 if (forward) {
2277                                         pos += widthFirstCharacter;
2278                                 } else {
2279                                         if (!NextCharacter(pos, increment)) {
2280                                                 break;
2281                                         }
2282                                 }
2283                         }
2284                 } else if (dbcsCodePage) {
2285                         constexpr size_t maxBytesCharacter = 2;
2286                         constexpr size_t maxFoldingExpansion = 4;
2287                         std::vector<char> searchThing((lengthFind+1) * maxBytesCharacter * maxFoldingExpansion + 1);
2288                         const size_t lenSearch = pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind);
2289                         while (forward ? (pos < endPos) : (pos >= endPos)) {
2290                                 int widthFirstCharacter = 0;
2291                                 Sci::Position indexDocument = 0;
2292                                 size_t indexSearch = 0;
2293                                 bool characterMatches = true;
2294                                 while (((pos + indexDocument) < limitPos) &&
2295                                         (indexSearch < lenSearch)) {
2296                                         const unsigned char leadByte = cbView.CharAt(pos + indexDocument);
2297                                         const int widthChar = (!UTF8IsAscii(leadByte) && IsDBCSLeadByteNoExcept(leadByte)) ? 2 : 1;
2298                                         if (!widthFirstCharacter) {
2299                                                 widthFirstCharacter = widthChar;
2300                                         }
2301                                         if ((pos + indexDocument + widthChar) > limitPos) {
2302                                                 break;
2303                                         }
2304                                         size_t lenFlat = 1;
2305                                         if (widthChar == 1) {
2306                                                 characterMatches = searchThing[indexSearch] == MakeLowerCase(leadByte);
2307                                         } else {
2308                                                 const char bytes[maxBytesCharacter + 1] {
2309                                                         static_cast<char>(leadByte),
2310                                                         cbView.CharAt(pos + indexDocument + 1)
2311                                                 };
2312                                                 char folded[maxBytesCharacter * maxFoldingExpansion + 1];
2313                                                 lenFlat = pcf->Fold(folded, sizeof(folded), bytes, widthChar);
2314                                                 // memcmp may examine lenFlat bytes in both arguments so assert it doesn't read past end of searchThing
2315                                                 assert((indexSearch + lenFlat) <= searchThing.size());
2316                                                 // Does folded match the buffer
2317                                                 characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat);
2318                                         }
2319                                         if (!characterMatches) {
2320                                                 break;
2321                                         }
2322                                         indexDocument += widthChar;
2323                                         indexSearch += lenFlat;
2324                                 }
2325                                 if (characterMatches && (indexSearch == lenSearch)) {
2326                                         if (MatchesWordOptions(word, wordStart, pos, indexDocument)) {
2327                                                 *length = indexDocument;
2328                                                 return pos;
2329                                         }
2330                                 }
2331                                 if (forward) {
2332                                         pos += widthFirstCharacter;
2333                                 } else {
2334                                         if (!NextCharacter(pos, increment)) {
2335                                                 break;
2336                                         }
2337                                 }
2338                         }
2339                 } else {
2340                         const Sci::Position endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos;
2341                         std::vector<char> searchThing(lengthFind + 1);
2342                         pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind);
2343                         while (forward ? (pos < endSearch) : (pos >= endSearch)) {
2344                                 bool found = (pos + lengthFind) <= limitPos;
2345                                 for (int indexSearch = 0; (indexSearch < lengthFind) && found; indexSearch++) {
2346                                         const char ch = cbView.CharAt(pos + indexSearch);
2347                                         const char chTest = searchThing[indexSearch];
2348                                         if (UTF8IsAscii(ch)) {
2349                                                 found = chTest == MakeLowerCase(ch);
2350                                         } else {
2351                                                 char folded[2];
2352                                                 pcf->Fold(folded, sizeof(folded), &ch, 1);
2353                                                 found = folded[0] == chTest;
2354                                         }
2355                                 }
2356                                 if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
2357                                         return pos;
2358                                 }
2359                                 pos += increment;
2360                         }
2361                 }
2362         }
2363         //Platform::DebugPrintf("Not found\n");
2364         return -1;
2365 }
2366
2367 const char *Document::SubstituteByPosition(const char *text, Sci::Position *length) {
2368         if (regex)
2369                 return regex->SubstituteByPosition(this, text, length);
2370         else
2371                 return nullptr;
2372 }
2373
2374 LineCharacterIndexType Document::LineCharacterIndex() const noexcept {
2375         return cb.LineCharacterIndex();
2376 }
2377
2378 void Document::AllocateLineCharacterIndex(LineCharacterIndexType lineCharacterIndex) {
2379         return cb.AllocateLineCharacterIndex(lineCharacterIndex);
2380 }
2381
2382 void Document::ReleaseLineCharacterIndex(LineCharacterIndexType lineCharacterIndex) {
2383         return cb.ReleaseLineCharacterIndex(lineCharacterIndex);
2384 }
2385
2386 Sci::Line Document::LinesTotal() const noexcept {
2387         return cb.Lines();
2388 }
2389
2390 void Document::AllocateLines(Sci::Line lines) {
2391         cb.AllocateLines(lines);
2392 }
2393
2394 void Document::SetDefaultCharClasses(bool includeWordClass) {
2395         charClass.SetDefaultCharClasses(includeWordClass);
2396 }
2397
2398 void Document::SetCharClasses(const unsigned char *chars, CharacterClass newCharClass) {
2399         charClass.SetCharClasses(chars, newCharClass);
2400 }
2401
2402 int Document::GetCharsOfClass(CharacterClass characterClass, unsigned char *buffer) const {
2403         return charClass.GetCharsOfClass(characterClass, buffer);
2404 }
2405
2406 void Document::SetCharacterCategoryOptimization(int countCharacters) {
2407         charMap.Optimize(countCharacters);
2408 }
2409
2410 int Document::CharacterCategoryOptimization() const noexcept {
2411         return charMap.Size();
2412 }
2413
2414 void SCI_METHOD Document::StartStyling(Sci_Position position) {
2415         endStyled = position;
2416 }
2417
2418 bool SCI_METHOD Document::SetStyleFor(Sci_Position length, char style) {
2419         if (enteredStyling != 0) {
2420                 return false;
2421         } else {
2422                 enteredStyling++;
2423                 const Sci::Position prevEndStyled = endStyled;
2424                 if (cb.SetStyleFor(endStyled, length, style)) {
2425                         const DocModification mh(ModificationFlags::ChangeStyle | ModificationFlags::User,
2426                                            prevEndStyled, length);
2427                         NotifyModified(mh);
2428                 }
2429                 endStyled += length;
2430                 enteredStyling--;
2431                 return true;
2432         }
2433 }
2434
2435 bool SCI_METHOD Document::SetStyles(Sci_Position length, const char *styles) {
2436         if (enteredStyling != 0) {
2437                 return false;
2438         } else {
2439                 enteredStyling++;
2440                 bool didChange = false;
2441                 Sci::Position startMod = 0;
2442                 Sci::Position endMod = 0;
2443                 for (int iPos = 0; iPos < length; iPos++, endStyled++) {
2444                         PLATFORM_ASSERT(endStyled < Length());
2445                         if (cb.SetStyleAt(endStyled, styles[iPos])) {
2446                                 if (!didChange) {
2447                                         startMod = endStyled;
2448                                 }
2449                                 didChange = true;
2450                                 endMod = endStyled;
2451                         }
2452                 }
2453                 if (didChange) {
2454                         const DocModification mh(ModificationFlags::ChangeStyle | ModificationFlags::User,
2455                                            startMod, endMod - startMod + 1);
2456                         NotifyModified(mh);
2457                 }
2458                 enteredStyling--;
2459                 return true;
2460         }
2461 }
2462
2463 void Document::EnsureStyledTo(Sci::Position pos) {
2464         if ((enteredStyling == 0) && (pos > GetEndStyled())) {
2465                 IncrementStyleClock();
2466                 if (pli && !pli->UseContainerLexing()) {
2467                         const Sci::Position endStyledTo = LineStartPosition(GetEndStyled());
2468                         pli->Colourise(endStyledTo, pos);
2469                 } else {
2470                         // Ask the watchers to style, and stop as soon as one responds.
2471                         for (std::vector<WatcherWithUserData>::iterator it = watchers.begin();
2472                                 (pos > GetEndStyled()) && (it != watchers.end()); ++it) {
2473                                 it->watcher->NotifyStyleNeeded(this, it->userData, pos);
2474                         }
2475                 }
2476         }
2477 }
2478
2479 void Document::StyleToAdjustingLineDuration(Sci::Position pos) {
2480         const Sci::Position stylingStart = GetEndStyled();
2481         ElapsedPeriod epStyling;
2482         EnsureStyledTo(pos);
2483         durationStyleOneByte.AddSample(pos - stylingStart, epStyling.Duration());
2484 }
2485
2486 LexInterface *Document::GetLexInterface() const noexcept {
2487         return pli.get();
2488 }
2489
2490 void Document::SetLexInterface(std::unique_ptr<LexInterface> pLexInterface) noexcept {
2491         pli = std::move(pLexInterface);
2492 }
2493
2494 int SCI_METHOD Document::SetLineState(Sci_Position line, int state) {
2495         const int statePrevious = States()->SetLineState(line, state, LinesTotal());
2496         if (state != statePrevious) {
2497                 const DocModification mh(ModificationFlags::ChangeLineState, LineStart(line), 0, 0, nullptr,
2498                         static_cast<Sci::Line>(line));
2499                 NotifyModified(mh);
2500         }
2501         return statePrevious;
2502 }
2503
2504 int SCI_METHOD Document::GetLineState(Sci_Position line) const {
2505         return States()->GetLineState(line);
2506 }
2507
2508 Sci::Line Document::GetMaxLineState() const noexcept {
2509         return States()->GetMaxLineState();
2510 }
2511
2512 void SCI_METHOD Document::ChangeLexerState(Sci_Position start, Sci_Position end) {
2513         const DocModification mh(ModificationFlags::LexerState, start,
2514                 end-start, 0, nullptr, 0);
2515         NotifyModified(mh);
2516 }
2517
2518 StyledText Document::MarginStyledText(Sci::Line line) const noexcept {
2519         const LineAnnotation *pla = Margins();
2520         return StyledText(pla->Length(line), pla->Text(line),
2521                 pla->MultipleStyles(line), pla->Style(line), pla->Styles(line));
2522 }
2523
2524 void Document::MarginSetText(Sci::Line line, const char *text) {
2525         Margins()->SetText(line, text);
2526         const DocModification mh(ModificationFlags::ChangeMargin, LineStart(line),
2527                 0, 0, nullptr, line);
2528         NotifyModified(mh);
2529 }
2530
2531 void Document::MarginSetStyle(Sci::Line line, int style) {
2532         Margins()->SetStyle(line, style);
2533         NotifyModified(DocModification(ModificationFlags::ChangeMargin, LineStart(line),
2534                 0, 0, nullptr, line));
2535 }
2536
2537 void Document::MarginSetStyles(Sci::Line line, const unsigned char *styles) {
2538         Margins()->SetStyles(line, styles);
2539         NotifyModified(DocModification(ModificationFlags::ChangeMargin, LineStart(line),
2540                 0, 0, nullptr, line));
2541 }
2542
2543 void Document::MarginClearAll() {
2544         const Sci::Line maxEditorLine = LinesTotal();
2545         for (Sci::Line l=0; l<maxEditorLine; l++)
2546                 MarginSetText(l, nullptr);
2547         // Free remaining data
2548         Margins()->ClearAll();
2549 }
2550
2551 StyledText Document::AnnotationStyledText(Sci::Line line) const noexcept {
2552         const LineAnnotation *pla = Annotations();
2553         return StyledText(pla->Length(line), pla->Text(line),
2554                 pla->MultipleStyles(line), pla->Style(line), pla->Styles(line));
2555 }
2556
2557 void Document::AnnotationSetText(Sci::Line line, const char *text) {
2558         if (line >= 0 && line < LinesTotal()) {
2559                 const Sci::Line linesBefore = AnnotationLines(line);
2560                 Annotations()->SetText(line, text);
2561                 const int linesAfter = AnnotationLines(line);
2562                 DocModification mh(ModificationFlags::ChangeAnnotation, LineStart(line),
2563                         0, 0, nullptr, line);
2564                 mh.annotationLinesAdded = linesAfter - linesBefore;
2565                 NotifyModified(mh);
2566         }
2567 }
2568
2569 void Document::AnnotationSetStyle(Sci::Line line, int style) {
2570         if (line >= 0 && line < LinesTotal()) {
2571                 Annotations()->SetStyle(line, style);
2572                 const DocModification mh(ModificationFlags::ChangeAnnotation, LineStart(line),
2573                         0, 0, nullptr, line);
2574                 NotifyModified(mh);
2575         }
2576 }
2577
2578 void Document::AnnotationSetStyles(Sci::Line line, const unsigned char *styles) {
2579         if (line >= 0 && line < LinesTotal()) {
2580                 Annotations()->SetStyles(line, styles);
2581         }
2582 }
2583
2584 int Document::AnnotationLines(Sci::Line line) const noexcept {
2585         return Annotations()->Lines(line);
2586 }
2587
2588 void Document::AnnotationClearAll() {
2589         if (Annotations()->Empty()) {
2590                 return;
2591         }
2592         const Sci::Line maxEditorLine = LinesTotal();
2593         for (Sci::Line l=0; l<maxEditorLine; l++)
2594                 AnnotationSetText(l, nullptr);
2595         // Free remaining data
2596         Annotations()->ClearAll();
2597 }
2598
2599 StyledText Document::EOLAnnotationStyledText(Sci::Line line) const noexcept {
2600         const LineAnnotation *pla = EOLAnnotations();
2601         return StyledText(pla->Length(line), pla->Text(line),
2602                 pla->MultipleStyles(line), pla->Style(line), pla->Styles(line));
2603 }
2604
2605 void Document::EOLAnnotationSetText(Sci::Line line, const char *text) {
2606         if (line >= 0 && line < LinesTotal()) {
2607                 EOLAnnotations()->SetText(line, text);
2608                 const DocModification mh(ModificationFlags::ChangeEOLAnnotation, LineStart(line),
2609                         0, 0, nullptr, line);
2610                 NotifyModified(mh);
2611         }
2612 }
2613
2614 void Document::EOLAnnotationSetStyle(Sci::Line line, int style) {
2615         if (line >= 0 && line < LinesTotal()) {
2616                 EOLAnnotations()->SetStyle(line, style);
2617                 const DocModification mh(ModificationFlags::ChangeEOLAnnotation, LineStart(line),
2618                         0, 0, nullptr, line);
2619                 NotifyModified(mh);
2620         }
2621 }
2622
2623 void Document::EOLAnnotationClearAll() {
2624         if (EOLAnnotations()->Empty()) {
2625                 return;
2626         }
2627         const Sci::Line maxEditorLine = LinesTotal();
2628         for (Sci::Line l=0; l<maxEditorLine; l++)
2629                 EOLAnnotationSetText(l, nullptr);
2630         // Free remaining data
2631         EOLAnnotations()->ClearAll();
2632 }
2633
2634 void Document::IncrementStyleClock() noexcept {
2635         styleClock = (styleClock + 1) % 0x100000;
2636 }
2637
2638 void SCI_METHOD Document::DecorationSetCurrentIndicator(int indicator) {
2639         decorations->SetCurrentIndicator(indicator);
2640 }
2641
2642 void SCI_METHOD Document::DecorationFillRange(Sci_Position position, int value, Sci_Position fillLength) {
2643         const FillResult<Sci::Position> fr = decorations->FillRange(
2644                 position, value, fillLength);
2645         if (fr.changed) {
2646                 const DocModification mh(ModificationFlags::ChangeIndicator | ModificationFlags::User,
2647                                                         fr.position, fr.fillLength);
2648                 NotifyModified(mh);
2649         }
2650 }
2651
2652 bool Document::AddWatcher(DocWatcher *watcher, void *userData) {
2653         const WatcherWithUserData wwud(watcher, userData);
2654         std::vector<WatcherWithUserData>::iterator it =
2655                 std::find(watchers.begin(), watchers.end(), wwud);
2656         if (it != watchers.end())
2657                 return false;
2658         watchers.push_back(wwud);
2659         return true;
2660 }
2661
2662 bool Document::RemoveWatcher(DocWatcher *watcher, void *userData) noexcept {
2663         try {
2664                 // This can never fail as WatcherWithUserData constructor and == are noexcept
2665                 // but std::find is not noexcept.
2666                 std::vector<WatcherWithUserData>::iterator it =
2667                         std::find(watchers.begin(), watchers.end(), WatcherWithUserData(watcher, userData));
2668                 if (it != watchers.end()) {
2669                         watchers.erase(it);
2670                         return true;
2671                 }
2672         } catch (...) {
2673                 // Ignore any exception
2674         }
2675         return false;
2676 }
2677
2678 void Document::NotifyModifyAttempt() {
2679         for (const WatcherWithUserData &watcher : watchers) {
2680                 watcher.watcher->NotifyModifyAttempt(this, watcher.userData);
2681         }
2682 }
2683
2684 void Document::NotifySavePoint(bool atSavePoint) {
2685         for (const WatcherWithUserData &watcher : watchers) {
2686                 watcher.watcher->NotifySavePoint(this, watcher.userData, atSavePoint);
2687         }
2688 }
2689
2690 void Document::NotifyModified(DocModification mh) {
2691         if (FlagSet(mh.modificationType, ModificationFlags::InsertText)) {
2692                 decorations->InsertSpace(mh.position, mh.length);
2693         } else if (FlagSet(mh.modificationType, ModificationFlags::DeleteText)) {
2694                 decorations->DeleteRange(mh.position, mh.length);
2695         }
2696         for (const WatcherWithUserData &watcher : watchers) {
2697                 watcher.watcher->NotifyModified(this, mh, watcher.userData);
2698         }
2699 }
2700
2701 bool Document::IsWordPartSeparator(unsigned int ch) const {
2702         return (WordCharacterClass(ch) == CharacterClass::word) && IsPunctuation(ch);
2703 }
2704
2705 Sci::Position Document::WordPartLeft(Sci::Position pos) const {
2706         if (pos > 0) {
2707                 pos -= CharacterBefore(pos).widthBytes;
2708                 CharacterExtracted ceStart = CharacterAfter(pos);
2709                 if (IsWordPartSeparator(ceStart.character)) {
2710                         while (pos > 0 && IsWordPartSeparator(CharacterAfter(pos).character)) {
2711                                 pos -= CharacterBefore(pos).widthBytes;
2712                         }
2713                 }
2714                 if (pos > 0) {
2715                         ceStart = CharacterAfter(pos);
2716                         pos -= CharacterBefore(pos).widthBytes;
2717                         if (IsLowerCase(ceStart.character)) {
2718                                 while (pos > 0 && IsLowerCase(CharacterAfter(pos).character))
2719                                         pos -= CharacterBefore(pos).widthBytes;
2720                                 if (!IsUpperCase(CharacterAfter(pos).character) && !IsLowerCase(CharacterAfter(pos).character))
2721                                         pos += CharacterAfter(pos).widthBytes;
2722                         } else if (IsUpperCase(ceStart.character)) {
2723                                 while (pos > 0 && IsUpperCase(CharacterAfter(pos).character))
2724                                         pos -= CharacterBefore(pos).widthBytes;
2725                                 if (!IsUpperCase(CharacterAfter(pos).character))
2726                                         pos += CharacterAfter(pos).widthBytes;
2727                         } else if (IsADigit(ceStart.character)) {
2728                                 while (pos > 0 && IsADigit(CharacterAfter(pos).character))
2729                                         pos -= CharacterBefore(pos).widthBytes;
2730                                 if (!IsADigit(CharacterAfter(pos).character))
2731                                         pos += CharacterAfter(pos).widthBytes;
2732                         } else if (IsPunctuation(ceStart.character)) {
2733                                 while (pos > 0 && IsPunctuation(CharacterAfter(pos).character))
2734                                         pos -= CharacterBefore(pos).widthBytes;
2735                                 if (!IsPunctuation(CharacterAfter(pos).character))
2736                                         pos += CharacterAfter(pos).widthBytes;
2737                         } else if (IsASpace(ceStart.character)) {
2738                                 while (pos > 0 && IsASpace(CharacterAfter(pos).character))
2739                                         pos -= CharacterBefore(pos).widthBytes;
2740                                 if (!IsASpace(CharacterAfter(pos).character))
2741                                         pos += CharacterAfter(pos).widthBytes;
2742                         } else if (!IsASCII(ceStart.character)) {
2743                                 while (pos > 0 && !IsASCII(CharacterAfter(pos).character))
2744                                         pos -= CharacterBefore(pos).widthBytes;
2745                                 if (IsASCII(CharacterAfter(pos).character))
2746                                         pos += CharacterAfter(pos).widthBytes;
2747                         } else {
2748                                 pos += CharacterAfter(pos).widthBytes;
2749                         }
2750                 }
2751         }
2752         return pos;
2753 }
2754
2755 Sci::Position Document::WordPartRight(Sci::Position pos) const {
2756         CharacterExtracted ceStart = CharacterAfter(pos);
2757         const Sci::Position length = LengthNoExcept();
2758         if (IsWordPartSeparator(ceStart.character)) {
2759                 while (pos < length && IsWordPartSeparator(CharacterAfter(pos).character))
2760                         pos += CharacterAfter(pos).widthBytes;
2761                 ceStart = CharacterAfter(pos);
2762         }
2763         if (!IsASCII(ceStart.character)) {
2764                 while (pos < length && !IsASCII(CharacterAfter(pos).character))
2765                         pos += CharacterAfter(pos).widthBytes;
2766         } else if (IsLowerCase(ceStart.character)) {
2767                 while (pos < length && IsLowerCase(CharacterAfter(pos).character))
2768                         pos += CharacterAfter(pos).widthBytes;
2769         } else if (IsUpperCase(ceStart.character)) {
2770                 if (IsLowerCase(CharacterAfter(pos + ceStart.widthBytes).character)) {
2771                         pos += CharacterAfter(pos).widthBytes;
2772                         while (pos < length && IsLowerCase(CharacterAfter(pos).character))
2773                                 pos += CharacterAfter(pos).widthBytes;
2774                 } else {
2775                         while (pos < length && IsUpperCase(CharacterAfter(pos).character))
2776                                 pos += CharacterAfter(pos).widthBytes;
2777                 }
2778                 if (IsLowerCase(CharacterAfter(pos).character) && IsUpperCase(CharacterBefore(pos).character))
2779                         pos -= CharacterBefore(pos).widthBytes;
2780         } else if (IsADigit(ceStart.character)) {
2781                 while (pos < length && IsADigit(CharacterAfter(pos).character))
2782                         pos += CharacterAfter(pos).widthBytes;
2783         } else if (IsPunctuation(ceStart.character)) {
2784                 while (pos < length && IsPunctuation(CharacterAfter(pos).character))
2785                         pos += CharacterAfter(pos).widthBytes;
2786         } else if (IsASpace(ceStart.character)) {
2787                 while (pos < length && IsASpace(CharacterAfter(pos).character))
2788                         pos += CharacterAfter(pos).widthBytes;
2789         } else {
2790                 pos += CharacterAfter(pos).widthBytes;
2791         }
2792         return pos;
2793 }
2794
2795 Sci::Position Document::ExtendStyleRange(Sci::Position pos, int delta, bool singleLine) noexcept {
2796         const char sStart = cb.StyleAt(pos);
2797         if (delta < 0) {
2798                 while (pos > 0 && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsEOLCharacter(cb.CharAt(pos))))
2799                         pos--;
2800                 pos++;
2801         } else {
2802                 while (pos < (LengthNoExcept()) && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsEOLCharacter(cb.CharAt(pos))))
2803                         pos++;
2804         }
2805         return pos;
2806 }
2807
2808 static char BraceOpposite(char ch) noexcept {
2809         switch (ch) {
2810         case '(':
2811                 return ')';
2812         case ')':
2813                 return '(';
2814         case '[':
2815                 return ']';
2816         case ']':
2817                 return '[';
2818         case '{':
2819                 return '}';
2820         case '}':
2821                 return '{';
2822         case '<':
2823                 return '>';
2824         case '>':
2825                 return '<';
2826         default:
2827                 return '\0';
2828         }
2829 }
2830
2831 // TODO: should be able to extend styled region to find matching brace
2832 Sci::Position Document::BraceMatch(Sci::Position position, Sci::Position /*maxReStyle*/, Sci::Position startPos, bool useStartPos) noexcept {
2833         const char chBrace = CharAt(position);
2834         const char chSeek = BraceOpposite(chBrace);
2835         if (chSeek == '\0')
2836                 return - 1;
2837         const int styBrace = StyleIndexAt(position);
2838         int direction = -1;
2839         if (chBrace == '(' || chBrace == '[' || chBrace == '{' || chBrace == '<')
2840                 direction = 1;
2841         int depth = 1;
2842         position = useStartPos ? startPos : NextPosition(position, direction);
2843         while ((position >= 0) && (position < LengthNoExcept())) {
2844                 const char chAtPos = CharAt(position);
2845                 const int styAtPos = StyleIndexAt(position);
2846                 if ((position > GetEndStyled()) || (styAtPos == styBrace)) {
2847                         if (chAtPos == chBrace)
2848                                 depth++;
2849                         if (chAtPos == chSeek)
2850                                 depth--;
2851                         if (depth == 0)
2852                                 return position;
2853                 }
2854                 const Sci::Position positionBeforeMove = position;
2855                 position = NextPosition(position, direction);
2856                 if (position == positionBeforeMove)
2857                         break;
2858         }
2859         return - 1;
2860 }
2861
2862 /**
2863  * Implementation of RegexSearchBase for the default built-in regular expression engine
2864  */
2865 class BuiltinRegex : public RegexSearchBase {
2866 public:
2867         explicit BuiltinRegex(CharClassify *charClassTable) : search(charClassTable) {}
2868
2869         Sci::Position FindText(Document *doc, Sci::Position minPos, Sci::Position maxPos, const char *s,
2870                         bool caseSensitive, bool word, bool wordStart, FindOption flags,
2871                         Sci::Position *length) override;
2872
2873         const char *SubstituteByPosition(Document *doc, const char *text, Sci::Position *length) override;
2874
2875 private:
2876         RESearch search;
2877         std::string substituted;
2878 };
2879
2880 namespace {
2881
2882 /**
2883 * RESearchRange keeps track of search range.
2884 */
2885 class RESearchRange {
2886 public:
2887         const Document *doc;
2888         int increment;
2889         Sci::Position startPos;
2890         Sci::Position endPos;
2891         Sci::Line lineRangeStart;
2892         Sci::Line lineRangeEnd;
2893         Sci::Line lineRangeBreak;
2894         RESearchRange(const Document *doc_, Sci::Position minPos, Sci::Position maxPos) noexcept : doc(doc_) {
2895                 increment = (minPos <= maxPos) ? 1 : -1;
2896
2897                 // Range endpoints should not be inside DBCS characters or between a CR and LF,
2898                 // but just in case, move them.
2899                 startPos = doc->MovePositionOutsideChar(minPos, 1, true);
2900                 endPos = doc->MovePositionOutsideChar(maxPos, 1, true);
2901
2902                 lineRangeStart = doc->SciLineFromPosition(startPos);
2903                 lineRangeEnd = doc->SciLineFromPosition(endPos);
2904                 lineRangeBreak = lineRangeEnd + increment;
2905         }
2906         Range LineRange(Sci::Line line, Sci::Position lineStartPos, Sci::Position lineEndPos) const noexcept {
2907                 Range range(lineStartPos, lineEndPos);
2908                 if (increment == 1) {
2909                         if (line == lineRangeStart)
2910                                 range.start = startPos;
2911                         if (line == lineRangeEnd)
2912                                 range.end = endPos;
2913                 } else {
2914                         if (line == lineRangeEnd)
2915                                 range.start = endPos;
2916                         if (line == lineRangeStart)
2917                                 range.end = startPos;
2918                 }
2919                 return range;
2920         }
2921 };
2922
2923 // Define a way for the Regular Expression code to access the document
2924 class DocumentIndexer final : public CharacterIndexer {
2925         Document *pdoc;
2926         Sci::Position end;
2927 public:
2928         DocumentIndexer(Document *pdoc_, Sci::Position end_) noexcept :
2929                 pdoc(pdoc_), end(end_) {
2930         }
2931
2932         char CharAt(Sci::Position index) const noexcept override {
2933                 if (index < 0 || index >= end)
2934                         return 0;
2935                 else
2936                         return pdoc->CharAt(index);
2937         }
2938         Sci::Position MovePositionOutsideChar(Sci::Position pos, Sci::Position moveDir) const noexcept override {
2939                 return pdoc->MovePositionOutsideChar(pos, moveDir, false);
2940         }
2941 };
2942
2943 #ifndef NO_CXX11_REGEX
2944
2945 class ByteIterator {
2946 public:
2947         using iterator_category = std::bidirectional_iterator_tag;
2948         using value_type = char;
2949         using difference_type = ptrdiff_t;
2950         using pointer = char*;
2951         using reference = char&;
2952
2953         const Document *doc;
2954         Sci::Position position;
2955
2956         explicit ByteIterator(const Document *doc_=nullptr, Sci::Position position_=0) noexcept :
2957                 doc(doc_), position(position_) {
2958         }
2959         char operator*() const noexcept {
2960                 return doc->CharAt(position);
2961         }
2962         ByteIterator &operator++() noexcept {
2963                 position++;
2964                 return *this;
2965         }
2966         ByteIterator operator++(int) noexcept {
2967                 ByteIterator retVal(*this);
2968                 position++;
2969                 return retVal;
2970         }
2971         ByteIterator &operator--() noexcept {
2972                 position--;
2973                 return *this;
2974         }
2975         bool operator==(const ByteIterator &other) const noexcept {
2976                 return doc == other.doc && position == other.position;
2977         }
2978         bool operator!=(const ByteIterator &other) const noexcept {
2979                 return doc != other.doc || position != other.position;
2980         }
2981         Sci::Position Pos() const noexcept {
2982                 return position;
2983         }
2984         Sci::Position PosRoundUp() const noexcept {
2985                 return position;
2986         }
2987 };
2988
2989 // On Windows, wchar_t is 16 bits wide and on Unix it is 32 bits wide.
2990 // Would be better to use sizeof(wchar_t) or similar to differentiate
2991 // but easier for now to hard-code platforms.
2992 // C++11 has char16_t and char32_t but neither Clang nor Visual C++
2993 // appear to allow specializing basic_regex over these.
2994
2995 #ifdef _WIN32
2996 #define WCHAR_T_IS_16 1
2997 #else
2998 #define WCHAR_T_IS_16 0
2999 #endif
3000
3001 #if WCHAR_T_IS_16
3002
3003 // On Windows, report non-BMP characters as 2 separate surrogates as that
3004 // matches wregex since it is based on wchar_t.
3005 class UTF8Iterator {
3006         // These 3 fields determine the iterator position and are used for comparisons
3007         const Document *doc;
3008         Sci::Position position;
3009         size_t characterIndex;
3010         // Remaining fields are derived from the determining fields so are excluded in comparisons
3011         unsigned int lenBytes;
3012         size_t lenCharacters;
3013         wchar_t buffered[2];
3014 public:
3015         using iterator_category = std::bidirectional_iterator_tag;
3016         using value_type = wchar_t;
3017         using difference_type = ptrdiff_t;
3018         using pointer = wchar_t*;
3019         using reference = wchar_t&;
3020
3021         explicit UTF8Iterator(const Document *doc_=nullptr, Sci::Position position_=0) noexcept :
3022                 doc(doc_), position(position_), characterIndex(0), lenBytes(0), lenCharacters(0), buffered{} {
3023                 buffered[0] = 0;
3024                 buffered[1] = 0;
3025                 if (doc) {
3026                         ReadCharacter();
3027                 }
3028         }
3029         wchar_t operator*() const noexcept {
3030                 assert(lenCharacters != 0);
3031                 return buffered[characterIndex];
3032         }
3033         UTF8Iterator &operator++() noexcept {
3034                 if ((characterIndex + 1) < (lenCharacters)) {
3035                         characterIndex++;
3036                 } else {
3037                         position += lenBytes;
3038                         ReadCharacter();
3039                         characterIndex = 0;
3040                 }
3041                 return *this;
3042         }
3043         UTF8Iterator operator++(int) noexcept {
3044                 UTF8Iterator retVal(*this);
3045                 if ((characterIndex + 1) < (lenCharacters)) {
3046                         characterIndex++;
3047                 } else {
3048                         position += lenBytes;
3049                         ReadCharacter();
3050                         characterIndex = 0;
3051                 }
3052                 return retVal;
3053         }
3054         UTF8Iterator &operator--() noexcept {
3055                 if (characterIndex) {
3056                         characterIndex--;
3057                 } else {
3058                         position = doc->NextPosition(position, -1);
3059                         ReadCharacter();
3060                         characterIndex = lenCharacters - 1;
3061                 }
3062                 return *this;
3063         }
3064         bool operator==(const UTF8Iterator &other) const noexcept {
3065                 // Only test the determining fields, not the character widths and values derived from this
3066                 return doc == other.doc &&
3067                         position == other.position &&
3068                         characterIndex == other.characterIndex;
3069         }
3070         bool operator!=(const UTF8Iterator &other) const noexcept {
3071                 // Only test the determining fields, not the character widths and values derived from this
3072                 return doc != other.doc ||
3073                         position != other.position ||
3074                         characterIndex != other.characterIndex;
3075         }
3076         Sci::Position Pos() const noexcept {
3077                 return position;
3078         }
3079         Sci::Position PosRoundUp() const noexcept {
3080                 if (characterIndex)
3081                         return position + lenBytes;     // Force to end of character
3082                 else
3083                         return position;
3084         }
3085 private:
3086         void ReadCharacter() noexcept {
3087                 const CharacterExtracted charExtracted = doc->ExtractCharacter(position);
3088                 lenBytes = charExtracted.widthBytes;
3089                 if (charExtracted.character == unicodeReplacementChar) {
3090                         lenCharacters = 1;
3091                         buffered[0] = static_cast<wchar_t>(charExtracted.character);
3092                 } else {
3093                         lenCharacters = UTF16FromUTF32Character(charExtracted.character, buffered);
3094                 }
3095         }
3096 };
3097
3098 #else
3099
3100 // On Unix, report non-BMP characters as single characters
3101
3102 class UTF8Iterator {
3103         const Document *doc;
3104         Sci::Position position;
3105 public:
3106         using iterator_category = std::bidirectional_iterator_tag;
3107         using value_type = wchar_t;
3108         using difference_type = ptrdiff_t;
3109         using pointer = wchar_t*;
3110         using reference = wchar_t&;
3111
3112         explicit UTF8Iterator(const Document *doc_=nullptr, Sci::Position position_=0) noexcept :
3113                 doc(doc_), position(position_) {
3114         }
3115         wchar_t operator*() const noexcept {
3116                 const CharacterExtracted charExtracted = doc->ExtractCharacter(position);
3117                 return charExtracted.character;
3118         }
3119         UTF8Iterator &operator++() noexcept {
3120                 position = doc->NextPosition(position, 1);
3121                 return *this;
3122         }
3123         UTF8Iterator operator++(int) noexcept {
3124                 UTF8Iterator retVal(*this);
3125                 position = doc->NextPosition(position, 1);
3126                 return retVal;
3127         }
3128         UTF8Iterator &operator--() noexcept {
3129                 position = doc->NextPosition(position, -1);
3130                 return *this;
3131         }
3132         bool operator==(const UTF8Iterator &other) const noexcept {
3133                 return doc == other.doc && position == other.position;
3134         }
3135         bool operator!=(const UTF8Iterator &other) const noexcept {
3136                 return doc != other.doc || position != other.position;
3137         }
3138         Sci::Position Pos() const noexcept {
3139                 return position;
3140         }
3141         Sci::Position PosRoundUp() const noexcept {
3142                 return position;
3143         }
3144 };
3145
3146 #endif
3147
3148 std::regex_constants::match_flag_type MatchFlags(const Document *doc, Sci::Position startPos, Sci::Position endPos, Sci::Position lineStartPos, Sci::Position lineEndPos) {
3149         std::regex_constants::match_flag_type flagsMatch = std::regex_constants::match_default;
3150         if (startPos != lineStartPos) {
3151 #ifdef _LIBCPP_VERSION
3152                 flagsMatch |= std::regex_constants::match_not_bol;
3153                 if (!doc->IsWordStartAt(startPos)) {
3154                         flagsMatch |= std::regex_constants::match_not_bow;
3155                 }
3156 #else
3157                 flagsMatch |= std::regex_constants::match_prev_avail;
3158 #endif
3159         }
3160         if (endPos != lineEndPos) {
3161                 flagsMatch |= std::regex_constants::match_not_eol;
3162                 if (!doc->IsWordEndAt(endPos)) {
3163                         flagsMatch |= std::regex_constants::match_not_eow;
3164                 }
3165         }
3166         return flagsMatch;
3167 }
3168
3169 template<typename Iterator, typename Regex>
3170 bool MatchOnLines(const Document *doc, const Regex &regexp, const RESearchRange &resr, RESearch &search) {
3171         std::match_results<Iterator> match;
3172
3173         // MSVC and libc++ have problems with ^ and $ matching line ends inside a range.
3174         // CRLF line ends are also a problem as ^ and $ only treat LF as a line end.
3175         // The std::regex::multiline option was added to C++17 to improve behaviour but
3176         // has not been implemented by compiler runtimes with MSVC always in multiline
3177         // mode and libc++ and libstdc++ always in single-line mode.
3178         // If multiline regex worked well then the line by line iteration could be removed
3179         // for the forwards case and replaced with the following:
3180 #ifdef REGEX_MULTILINE
3181         const Sci::Position lineStartPos = doc->LineStart(resr.lineRangeStart);
3182         const Sci::Position lineEndPos = doc->LineEnd(resr.lineRangeEnd);
3183         Iterator itStart(doc, resr.startPos);
3184         Iterator itEnd(doc, resr.endPos);
3185         const std::regex_constants::match_flag_type flagsMatch = MatchFlags(doc, resr.startPos, resr.endPos, lineStartPos, lineEndPos);
3186         const bool matched = std::regex_search(itStart, itEnd, match, regexp, flagsMatch);
3187 #else
3188         // Line by line.
3189         bool matched = false;
3190         for (Sci::Line line = resr.lineRangeStart; line != resr.lineRangeBreak; line += resr.increment) {
3191                 const Sci::Position lineStartPos = doc->LineStart(line);
3192                 const Sci::Position lineEndPos = doc->LineEnd(line);
3193                 const Range lineRange = resr.LineRange(line, lineStartPos, lineEndPos);
3194                 Iterator itStart(doc, lineRange.start);
3195                 Iterator itEnd(doc, lineRange.end);
3196                 const std::regex_constants::match_flag_type flagsMatch = MatchFlags(doc, lineRange.start, lineRange.end, lineStartPos, lineEndPos);
3197                 std::regex_iterator<Iterator> it(itStart, itEnd, regexp, flagsMatch);
3198                 for (const std::regex_iterator<Iterator> last; it != last; ++it) {
3199                         match = *it;
3200                         matched = true;
3201                         if (resr.increment > 0) {
3202                                 break;
3203                         }
3204                 }
3205                 if (matched) {
3206                         break;
3207                 }
3208         }
3209 #endif
3210         if (matched) {
3211                 for (size_t co = 0; co < match.size() && co < RESearch::MAXTAG; co++) {
3212                         search.bopat[co] = match[co].first.Pos();
3213                         search.eopat[co] = match[co].second.PosRoundUp();
3214                 }
3215         }
3216         return matched;
3217 }
3218
3219 Sci::Position Cxx11RegexFindText(const Document *doc, Sci::Position minPos, Sci::Position maxPos, const char *s,
3220         bool caseSensitive, Sci::Position *length, RESearch &search) {
3221         const RESearchRange resr(doc, minPos, maxPos);
3222         try {
3223                 //ElapsedPeriod ep;
3224                 std::regex::flag_type flagsRe = std::regex::ECMAScript;
3225                 // Flags that appear to have no effect:
3226                 // | std::regex::collate | std::regex::extended;
3227                 if (!caseSensitive)
3228                         flagsRe = flagsRe | std::regex::icase;
3229
3230 #if defined(REGEX_MULTILINE) && !defined(_MSC_VER)
3231                 flagsRe = flagsRe | std::regex::multiline;
3232 #endif
3233
3234                 // Clear the RESearch so can fill in matches
3235                 search.Clear();
3236
3237                 bool matched = false;
3238                 if (CpUtf8 == doc->dbcsCodePage) {
3239                         const std::wstring ws = WStringFromUTF8(s);
3240                         std::wregex regexp;
3241                         regexp.assign(ws, flagsRe);
3242                         matched = MatchOnLines<UTF8Iterator>(doc, regexp, resr, search);
3243                 } else {
3244                         std::regex regexp;
3245                         regexp.assign(s, flagsRe);
3246                         matched = MatchOnLines<ByteIterator>(doc, regexp, resr, search);
3247                 }
3248
3249                 Sci::Position posMatch = -1;
3250                 if (matched) {
3251                         posMatch = search.bopat[0];
3252                         *length = search.eopat[0] - search.bopat[0];
3253                 }
3254                 // Example - search in doc/ScintillaHistory.html for
3255                 // [[:upper:]]eta[[:space:]]
3256                 // On MacBook, normally around 1 second but with locale imbued -> 14 seconds.
3257                 //const double durSearch = ep.Duration(true);
3258                 //Platform::DebugPrintf("Search:%9.6g \n", durSearch);
3259                 return posMatch;
3260         } catch (std::regex_error &) {
3261                 // Failed to create regular expression
3262                 throw RegexError();
3263         } catch (...) {
3264                 // Failed in some other way
3265                 return -1;
3266         }
3267 }
3268
3269 #endif
3270
3271 }
3272
3273 Sci::Position BuiltinRegex::FindText(Document *doc, Sci::Position minPos, Sci::Position maxPos, const char *s,
3274                         bool caseSensitive, bool, bool, FindOption flags,
3275                         Sci::Position *length) {
3276
3277 #ifndef NO_CXX11_REGEX
3278         if (FlagSet(flags, FindOption::Cxx11RegEx)) {
3279                         return Cxx11RegexFindText(doc, minPos, maxPos, s,
3280                         caseSensitive, length, search);
3281         }
3282 #endif
3283
3284         const RESearchRange resr(doc, minPos, maxPos);
3285
3286         const bool posix = FlagSet(flags, FindOption::Posix);
3287
3288         const char *errmsg = search.Compile(s, *length, caseSensitive, posix);
3289         if (errmsg) {
3290                 return -1;
3291         }
3292         // Find a variable in a property file: \$(\([A-Za-z0-9_.]+\))
3293         // Replace first '.' with '-' in each property file variable reference:
3294         //     Search: \$(\([A-Za-z0-9_-]+\)\.\([A-Za-z0-9_.]+\))
3295         //     Replace: $(\1-\2)
3296         Sci::Position pos = -1;
3297         Sci::Position lenRet = 0;
3298         const bool searchforLineStart = s[0] == '^';
3299         const char searchEnd = s[*length - 1];
3300         const char searchEndPrev = (*length > 1) ? s[*length - 2] : '\0';
3301         const bool searchforLineEnd = (searchEnd == '$') && (searchEndPrev != '\\');
3302         for (Sci::Line line = resr.lineRangeStart; line != resr.lineRangeBreak; line += resr.increment) {
3303                 const Sci::Position lineStartPos = doc->LineStart(line);
3304                 const Sci::Position lineEndPos = doc->LineEnd(line);
3305                 Sci::Position startOfLine = lineStartPos;
3306                 Sci::Position endOfLine = lineEndPos;
3307                 if (resr.increment == 1) {
3308                         if (line == resr.lineRangeStart) {
3309                                 if ((resr.startPos != startOfLine) && searchforLineStart)
3310                                         continue;       // Can't match start of line if start position after start of line
3311                                 startOfLine = resr.startPos;
3312                         }
3313                         if (line == resr.lineRangeEnd) {
3314                                 if ((resr.endPos != endOfLine) && searchforLineEnd)
3315                                         continue;       // Can't match end of line if end position before end of line
3316                                 endOfLine = resr.endPos;
3317                         }
3318                 } else {
3319                         if (line == resr.lineRangeEnd) {
3320                                 if ((resr.endPos != startOfLine) && searchforLineStart)
3321                                         continue;       // Can't match start of line if end position after start of line
3322                                 startOfLine = resr.endPos;
3323                         }
3324                         if (line == resr.lineRangeStart) {
3325                                 if ((resr.startPos != endOfLine) && searchforLineEnd)
3326                                         continue;       // Can't match end of line if start position before end of line
3327                                 endOfLine = resr.startPos;
3328                         }
3329                 }
3330
3331                 const DocumentIndexer di(doc, endOfLine);
3332                 search.SetLineRange(lineStartPos, lineEndPos);
3333                 int success = search.Execute(di, startOfLine, endOfLine);
3334                 if (success) {
3335                         Sci::Position endPos = search.eopat[0];
3336                         // There can be only one start of a line, so no need to look for last match in line
3337                         if ((resr.increment == -1) && !searchforLineStart) {
3338                                 // Check for the last match on this line.
3339                                 while (success && (endPos < endOfLine)) {
3340                                         const RESearch::MatchPositions bopat = search.bopat;
3341                                         const RESearch::MatchPositions eopat = search.eopat;
3342                                         pos = endPos;
3343                                         if (pos == bopat[0]) {
3344                                                 // empty match
3345                                                 pos = doc->NextPosition(pos, 1);
3346                                         }
3347                                         success = search.Execute(di, pos, endOfLine);
3348                                         if (success) {
3349                                                 endPos = search.eopat[0];
3350                                         } else {
3351                                                 search.bopat = bopat;
3352                                                 search.eopat = eopat;
3353                                         }
3354                                 }
3355                         }
3356                         pos = search.bopat[0];
3357                         lenRet = endPos - pos;
3358                         break;
3359                 }
3360         }
3361         *length = lenRet;
3362         return pos;
3363 }
3364
3365 const char *BuiltinRegex::SubstituteByPosition(Document *doc, const char *text, Sci::Position *length) {
3366         substituted.clear();
3367         for (Sci::Position j = 0; j < *length; j++) {
3368                 if (text[j] == '\\') {
3369                         const char chNext = text[++j];
3370                         if (chNext >= '0' && chNext <= '9') {
3371                                 const unsigned int patNum = chNext - '0';
3372                                 const Sci::Position startPos = search.bopat[patNum];
3373                                 const Sci::Position len = search.eopat[patNum] - startPos;
3374                                 if (len > 0) {  // Will be null if try for a match that did not occur
3375                                         const size_t size = substituted.length();
3376                                         substituted.resize(size + len);
3377                                         doc->GetCharRange(substituted.data() + size, startPos, len);
3378                                 }
3379                         } else {
3380                                 switch (chNext) {
3381                                 case 'a':
3382                                         substituted.push_back('\a');
3383                                         break;
3384                                 case 'b':
3385                                         substituted.push_back('\b');
3386                                         break;
3387                                 case 'f':
3388                                         substituted.push_back('\f');
3389                                         break;
3390                                 case 'n':
3391                                         substituted.push_back('\n');
3392                                         break;
3393                                 case 'r':
3394                                         substituted.push_back('\r');
3395                                         break;
3396                                 case 't':
3397                                         substituted.push_back('\t');
3398                                         break;
3399                                 case 'v':
3400                                         substituted.push_back('\v');
3401                                         break;
3402                                 case '\\':
3403                                         substituted.push_back('\\');
3404                                         break;
3405                                 default:
3406                                         substituted.push_back('\\');
3407                                         j--;
3408                                 }
3409                         }
3410                 } else {
3411                         substituted.push_back(text[j]);
3412                 }
3413         }
3414         *length = substituted.length();
3415         return substituted.c_str();
3416 }
3417
3418 #ifndef SCI_OWNREGEX
3419
3420 RegexSearchBase *Scintilla::Internal::CreateRegexSearch(CharClassify *charClassTable) {
3421         return new BuiltinRegex(charClassTable);
3422 }
3423
3424 #endif