scintilla/src/Document.cxx

   1 // Scintilla source code edit control
   2 /** @file Document.cxx
   3  ** Text document that handles notifications, DBCS, styling, words and end of line.
   4  **/
   5 // Copyright 1998-2011 by Neil Hodgson <neilh@scintilla.org>
   6 // The License.txt file describes the conditions under which this software may be distributed.
   7
   8 #include <stdlib.h>
   9 #include <string.h>
  10 #include <stdio.h>
  11 #include <assert.h>
  12 #include <ctype.h>
  13
  14 #include <stdexcept>
  15 #include <string>
  16 #include <vector>
  17 #include <algorithm>
  18
  19 #ifdef CXX11_REGEX
  20 #include <regex>
  21 #endif
  22
  23 #include "Platform.h"
  24
  25 #include "ILexer.h"
  26 #include "Scintilla.h"
  27
  28 #include "CharacterSet.h"
  29 #include "Position.h"
  30 #include "SplitVector.h"
  31 #include "Partitioning.h"
  32 #include "RunStyles.h"
  33 #include "CellBuffer.h"
  34 #include "PerLine.h"
  35 #include "CharClassify.h"
  36 #include "Decoration.h"
  37 #include "CaseFolder.h"
  38 #include "Document.h"
  39 #include "RESearch.h"
  40 #include "UniConversion.h"
  41 #include "UnicodeFromUTF8.h"
  42
  43 #ifdef SCI_NAMESPACE
  44 using namespace Scintilla;
  45 #endif
  46
  47 static inline bool IsPunctuation(char ch) {
  48         return IsASCII(ch) && ispunct(ch);
  49 }
  50
  51 void LexInterface::Colourise(int start, int end) {
  52         if (pdoc && instance && !performingStyle) {
  53                 // Protect against reentrance, which may occur, for example, when
  54                 // fold points are discovered while performing styling and the folding
  55                 // code looks for child lines which may trigger styling.
  56                 performingStyle = true;
  57
  58                 int lengthDoc = pdoc->Length();
  59                 if (end == -1)
  60                         end = lengthDoc;
  61                 int len = end - start;
  62
  63                 PLATFORM_ASSERT(len >= 0);
  64                 PLATFORM_ASSERT(start + len <= lengthDoc);
  65
  66                 int styleStart = 0;
  67                 if (start > 0)
  68                         styleStart = pdoc->StyleAt(start - 1);
  69
  70                 if (len > 0) {
  71                         instance->Lex(start, len, styleStart, pdoc);
  72                         instance->Fold(start, len, styleStart, pdoc);
  73                 }
  74
  75                 performingStyle = false;
  76         }
  77 }
  78
  79 int LexInterface::LineEndTypesSupported() {
  80         if (instance) {
  81                 int interfaceVersion = instance->Version();
  82                 if (interfaceVersion >= lvSubStyles) {
  83                         ILexerWithSubStyles *ssinstance = static_cast<ILexerWithSubStyles *>(instance);
  84                         return ssinstance->LineEndTypesSupported();
  85                 }
  86         }
  87         return 0;
  88 }
  89
  90 Document::Document() {
  91         refCount = 0;
  92         pcf = NULL;
  93 #ifdef _WIN32
  94         eolMode = SC_EOL_CRLF;
  95 #else
  96         eolMode = SC_EOL_LF;
  97 #endif
  98         dbcsCodePage = 0;
  99         lineEndBitSet = SC_LINE_END_TYPE_DEFAULT;
 100         endStyled = 0;
 101         styleClock = 0;
 102         enteredModification = 0;
 103         enteredStyling = 0;
 104         enteredReadOnlyCount = 0;
 105         insertionSet = false;
 106         tabInChars = 8;
 107         indentInChars = 0;
 108         actualIndentInChars = 8;
 109         useTabs = true;
 110         tabIndents = true;
 111         backspaceUnindents = false;
 112         durationStyleOneLine = 0.00001;
 113
 114         matchesValid = false;
 115         regex = 0;
 116
 117         UTF8BytesOfLeadInitialise();
 118
 119         perLineData[ldMarkers] = new LineMarkers();
 120         perLineData[ldLevels] = new LineLevels();
 121         perLineData[ldState] = new LineState();
 122         perLineData[ldMargin] = new LineAnnotation();
 123         perLineData[ldAnnotation] = new LineAnnotation();
 124
 125         cb.SetPerLine(this);
 126
 127         pli = 0;
 128 }
 129
 130 Document::~Document() {
 131         for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
 132                 it->watcher->NotifyDeleted(this, it->userData);
 133         }
 134         for (int j=0; j<ldSize; j++) {
 135                 delete perLineData[j];
 136                 perLineData[j] = 0;
 137         }
 138         delete regex;
 139         regex = 0;
 140         delete pli;
 141         pli = 0;
 142         delete pcf;
 143         pcf = 0;
 144 }
 145
 146 void Document::Init() {
 147         for (int j=0; j<ldSize; j++) {
 148                 if (perLineData[j])
 149                         perLineData[j]->Init();
 150         }
 151 }
 152
 153 int Document::LineEndTypesSupported() const {
 154         if ((SC_CP_UTF8 == dbcsCodePage) && pli)
 155                 return pli->LineEndTypesSupported();
 156         else
 157                 return 0;
 158 }
 159
 160 bool Document::SetDBCSCodePage(int dbcsCodePage_) {
 161         if (dbcsCodePage != dbcsCodePage_) {
 162                 dbcsCodePage = dbcsCodePage_;
 163                 SetCaseFolder(NULL);
 164                 cb.SetLineEndTypes(lineEndBitSet & LineEndTypesSupported());
 165                 return true;
 166         } else {
 167                 return false;
 168         }
 169 }
 170
 171 bool Document::SetLineEndTypesAllowed(int lineEndBitSet_) {
 172         if (lineEndBitSet != lineEndBitSet_) {
 173                 lineEndBitSet = lineEndBitSet_;
 174                 int lineEndBitSetActive = lineEndBitSet & LineEndTypesSupported();
 175                 if (lineEndBitSetActive != cb.GetLineEndTypes()) {
 176                         ModifiedAt(0);
 177                         cb.SetLineEndTypes(lineEndBitSetActive);
 178                         return true;
 179                 } else {
 180                         return false;
 181                 }
 182         } else {
 183                 return false;
 184         }
 185 }
 186
 187 void Document::InsertLine(int line) {
 188         for (int j=0; j<ldSize; j++) {
 189                 if (perLineData[j])
 190                         perLineData[j]->InsertLine(line);
 191         }
 192 }
 193
 194 void Document::RemoveLine(int line) {
 195         for (int j=0; j<ldSize; j++) {
 196                 if (perLineData[j])
 197                         perLineData[j]->RemoveLine(line);
 198         }
 199 }
 200
 201 // Increase reference count and return its previous value.
 202 int Document::AddRef() {
 203         return refCount++;
 204 }
 205
 206 // Decrease reference count and return its previous value.
 207 // Delete the document if reference count reaches zero.
 208 int SCI_METHOD Document::Release() {
 209         int curRefCount = --refCount;
 210         if (curRefCount == 0)
 211                 delete this;
 212         return curRefCount;
 213 }
 214
 215 void Document::SetSavePoint() {
 216         cb.SetSavePoint();
 217         NotifySavePoint(true);
 218 }
 219
 220 void Document::TentativeUndo() {
 221         if (!TentativeActive())
 222                 return;
 223         CheckReadOnly();
 224         if (enteredModification == 0) {
 225                 enteredModification++;
 226                 if (!cb.IsReadOnly()) {
 227                         bool startSavePoint = cb.IsSavePoint();
 228                         bool multiLine = false;
 229                         int steps = cb.TentativeSteps();
 230                         //Platform::DebugPrintf("Steps=%d\n", steps);
 231                         for (int step = 0; step < steps; step++) {
 232                                 const int prevLinesTotal = LinesTotal();
 233                                 const Action &action = cb.GetUndoStep();
 234                                 if (action.at == removeAction) {
 235                                         NotifyModified(DocModification(
 236                                                                         SC_MOD_BEFOREINSERT | SC_PERFORMED_UNDO, action));
 237                                 } else if (action.at == containerAction) {
 238                                         DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_UNDO);
 239                                         dm.token = action.position;
 240                                         NotifyModified(dm);
 241                                 } else {
 242                                         NotifyModified(DocModification(
 243                                                                         SC_MOD_BEFOREDELETE | SC_PERFORMED_UNDO, action));
 244                                 }
 245                                 cb.PerformUndoStep();
 246                                 if (action.at != containerAction) {
 247                                         ModifiedAt(action.position);
 248                                 }
 249
 250                                 int modFlags = SC_PERFORMED_UNDO;
 251                                 // With undo, an insertion action becomes a deletion notification
 252                                 if (action.at == removeAction) {
 253                                         modFlags |= SC_MOD_INSERTTEXT;
 254                                 } else if (action.at == insertAction) {
 255                                         modFlags |= SC_MOD_DELETETEXT;
 256                                 }
 257                                 if (steps > 1)
 258                                         modFlags |= SC_MULTISTEPUNDOREDO;
 259                                 const int linesAdded = LinesTotal() - prevLinesTotal;
 260                                 if (linesAdded != 0)
 261                                         multiLine = true;
 262                                 if (step == steps - 1) {
 263                                         modFlags |= SC_LASTSTEPINUNDOREDO;
 264                                         if (multiLine)
 265                                                 modFlags |= SC_MULTILINEUNDOREDO;
 266                                 }
 267                                 NotifyModified(DocModification(modFlags, action.position, action.lenData,
 268                                                                                            linesAdded, action.data));
 269                         }
 270
 271                         bool endSavePoint = cb.IsSavePoint();
 272                         if (startSavePoint != endSavePoint)
 273                                 NotifySavePoint(endSavePoint);
 274
 275                         cb.TentativeCommit();
 276                 }
 277                 enteredModification--;
 278         }
 279 }
 280
 281 int Document::GetMark(int line) {
 282         return static_cast<LineMarkers *>(perLineData[ldMarkers])->MarkValue(line);
 283 }
 284
 285 int Document::MarkerNext(int lineStart, int mask) const {
 286         return static_cast<LineMarkers *>(perLineData[ldMarkers])->MarkerNext(lineStart, mask);
 287 }
 288
 289 int Document::AddMark(int line, int markerNum) {
 290         if (line >= 0 && line <= LinesTotal()) {
 291                 int prev = static_cast<LineMarkers *>(perLineData[ldMarkers])->
 292                         AddMark(line, markerNum, LinesTotal());
 293                 DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, 0, line);
 294                 NotifyModified(mh);
 295                 return prev;
 296         } else {
 297                 return 0;
 298         }
 299 }
 300
 301 void Document::AddMarkSet(int line, int valueSet) {
 302         if (line < 0 || line > LinesTotal()) {
 303                 return;
 304         }
 305         unsigned int m = valueSet;
 306         for (int i = 0; m; i++, m >>= 1)
 307                 if (m & 1)
 308                         static_cast<LineMarkers *>(perLineData[ldMarkers])->
 309                                 AddMark(line, i, LinesTotal());
 310         DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, 0, line);
 311         NotifyModified(mh);
 312 }
 313
 314 void Document::DeleteMark(int line, int markerNum) {
 315         static_cast<LineMarkers *>(perLineData[ldMarkers])->DeleteMark(line, markerNum, false);
 316         DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, 0, line);
 317         NotifyModified(mh);
 318 }
 319
 320 void Document::DeleteMarkFromHandle(int markerHandle) {
 321         static_cast<LineMarkers *>(perLineData[ldMarkers])->DeleteMarkFromHandle(markerHandle);
 322         DocModification mh(SC_MOD_CHANGEMARKER, 0, 0, 0, 0);
 323         mh.line = -1;
 324         NotifyModified(mh);
 325 }
 326
 327 void Document::DeleteAllMarks(int markerNum) {
 328         bool someChanges = false;
 329         for (int line = 0; line < LinesTotal(); line++) {
 330                 if (static_cast<LineMarkers *>(perLineData[ldMarkers])->DeleteMark(line, markerNum, true))
 331                         someChanges = true;
 332         }
 333         if (someChanges) {
 334                 DocModification mh(SC_MOD_CHANGEMARKER, 0, 0, 0, 0);
 335                 mh.line = -1;
 336                 NotifyModified(mh);
 337         }
 338 }
 339
 340 int Document::LineFromHandle(int markerHandle) {
 341         return static_cast<LineMarkers *>(perLineData[ldMarkers])->LineFromHandle(markerHandle);
 342 }
 343
 344 Sci_Position SCI_METHOD Document::LineStart(Sci_Position line) const {
 345         return cb.LineStart(line);
 346 }
 347
 348 bool Document::IsLineStartPosition(int position) const {
 349         return LineStart(LineFromPosition(position)) == position;
 350 }
 351
 352 Sci_Position SCI_METHOD Document::LineEnd(Sci_Position line) const {
 353         if (line >= LinesTotal() - 1) {
 354                 return LineStart(line + 1);
 355         } else {
 356                 int position = LineStart(line + 1);
 357                 if (SC_CP_UTF8 == dbcsCodePage) {
 358                         unsigned char bytes[] = {
 359                                 static_cast<unsigned char>(cb.CharAt(position-3)),
 360                                 static_cast<unsigned char>(cb.CharAt(position-2)),
 361                                 static_cast<unsigned char>(cb.CharAt(position-1)),
 362                         };
 363                         if (UTF8IsSeparator(bytes)) {
 364                                 return position - UTF8SeparatorLength;
 365                         }
 366                         if (UTF8IsNEL(bytes+1)) {
 367                                 return position - UTF8NELLength;
 368                         }
 369                 }
 370                 position--; // Back over CR or LF
 371                 // When line terminator is CR+LF, may need to go back one more
 372                 if ((position > LineStart(line)) && (cb.CharAt(position - 1) == '\r')) {
 373                         position--;
 374                 }
 375                 return position;
 376         }
 377 }
 378
 379 void SCI_METHOD Document::SetErrorStatus(int status) {
 380         // Tell the watchers an error has occurred.
 381         for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
 382                 it->watcher->NotifyErrorOccurred(this, it->userData, status);
 383         }
 384 }
 385
 386 Sci_Position SCI_METHOD Document::LineFromPosition(Sci_Position pos) const {
 387         return cb.LineFromPosition(pos);
 388 }
 389
 390 int Document::LineEndPosition(int position) const {
 391         return LineEnd(LineFromPosition(position));
 392 }
 393
 394 bool Document::IsLineEndPosition(int position) const {
 395         return LineEnd(LineFromPosition(position)) == position;
 396 }
 397
 398 bool Document::IsPositionInLineEnd(int position) const {
 399         return position >= LineEnd(LineFromPosition(position));
 400 }
 401
 402 int Document::VCHomePosition(int position) const {
 403         int line = LineFromPosition(position);
 404         int startPosition = LineStart(line);
 405         int endLine = LineEnd(line);
 406         int startText = startPosition;
 407         while (startText < endLine && (cb.CharAt(startText) == ' ' || cb.CharAt(startText) == '\t'))
 408                 startText++;
 409         if (position == startText)
 410                 return startPosition;
 411         else
 412                 return startText;
 413 }
 414
 415 int SCI_METHOD Document::SetLevel(Sci_Position line, int level) {
 416         int prev = static_cast<LineLevels *>(perLineData[ldLevels])->SetLevel(line, level, LinesTotal());
 417         if (prev != level) {
 418                 DocModification mh(SC_MOD_CHANGEFOLD | SC_MOD_CHANGEMARKER,
 419                                    LineStart(line), 0, 0, 0, line);
 420                 mh.foldLevelNow = level;
 421                 mh.foldLevelPrev = prev;
 422                 NotifyModified(mh);
 423         }
 424         return prev;
 425 }
 426
 427 int SCI_METHOD Document::GetLevel(Sci_Position line) const {
 428         return static_cast<LineLevels *>(perLineData[ldLevels])->GetLevel(line);
 429 }
 430
 431 void Document::ClearLevels() {
 432         static_cast<LineLevels *>(perLineData[ldLevels])->ClearLevels();
 433 }
 434
 435 static bool IsSubordinate(int levelStart, int levelTry) {
 436         if (levelTry & SC_FOLDLEVELWHITEFLAG)
 437                 return true;
 438         else
 439                 return (levelStart & SC_FOLDLEVELNUMBERMASK) < (levelTry & SC_FOLDLEVELNUMBERMASK);
 440 }
 441
 442 int Document::GetLastChild(int lineParent, int level, int lastLine) {
 443         if (level == -1)
 444                 level = GetLevel(lineParent) & SC_FOLDLEVELNUMBERMASK;
 445         int maxLine = LinesTotal();
 446         int lookLastLine = (lastLine != -1) ? Platform::Minimum(LinesTotal() - 1, lastLine) : -1;
 447         int lineMaxSubord = lineParent;
 448         while (lineMaxSubord < maxLine - 1) {
 449                 EnsureStyledTo(LineStart(lineMaxSubord + 2));
 450                 if (!IsSubordinate(level, GetLevel(lineMaxSubord + 1)))
 451                         break;
 452                 if ((lookLastLine != -1) && (lineMaxSubord >= lookLastLine) && !(GetLevel(lineMaxSubord) & SC_FOLDLEVELWHITEFLAG))
 453                         break;
 454                 lineMaxSubord++;
 455         }
 456         if (lineMaxSubord > lineParent) {
 457                 if (level > (GetLevel(lineMaxSubord + 1) & SC_FOLDLEVELNUMBERMASK)) {
 458                         // Have chewed up some whitespace that belongs to a parent so seek back
 459                         if (GetLevel(lineMaxSubord) & SC_FOLDLEVELWHITEFLAG) {
 460                                 lineMaxSubord--;
 461                         }
 462                 }
 463         }
 464         return lineMaxSubord;
 465 }
 466
 467 int Document::GetFoldParent(int line) const {
 468         int level = GetLevel(line) & SC_FOLDLEVELNUMBERMASK;
 469         int lineLook = line - 1;
 470         while ((lineLook > 0) && (
 471                     (!(GetLevel(lineLook) & SC_FOLDLEVELHEADERFLAG)) ||
 472                     ((GetLevel(lineLook) & SC_FOLDLEVELNUMBERMASK) >= level))
 473               ) {
 474                 lineLook--;
 475         }
 476         if ((GetLevel(lineLook) & SC_FOLDLEVELHEADERFLAG) &&
 477                 ((GetLevel(lineLook) & SC_FOLDLEVELNUMBERMASK) < level)) {
 478                 return lineLook;
 479         } else {
 480                 return -1;
 481         }
 482 }
 483
 484 void Document::GetHighlightDelimiters(HighlightDelimiter &highlightDelimiter, int line, int lastLine) {
 485         int level = GetLevel(line);
 486         int lookLastLine = Platform::Maximum(line, lastLine) + 1;
 487
 488         int lookLine = line;
 489         int lookLineLevel = level;
 490         int lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
 491         while ((lookLine > 0) && ((lookLineLevel & SC_FOLDLEVELWHITEFLAG) ||
 492                 ((lookLineLevel & SC_FOLDLEVELHEADERFLAG) && (lookLineLevelNum >= (GetLevel(lookLine + 1) & SC_FOLDLEVELNUMBERMASK))))) {
 493                 lookLineLevel = GetLevel(--lookLine);
 494                 lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
 495         }
 496
 497         int beginFoldBlock = (lookLineLevel & SC_FOLDLEVELHEADERFLAG) ? lookLine : GetFoldParent(lookLine);
 498         if (beginFoldBlock == -1) {
 499                 highlightDelimiter.Clear();
 500                 return;
 501         }
 502
 503         int endFoldBlock = GetLastChild(beginFoldBlock, -1, lookLastLine);
 504         int firstChangeableLineBefore = -1;
 505         if (endFoldBlock < line) {
 506                 lookLine = beginFoldBlock - 1;
 507                 lookLineLevel = GetLevel(lookLine);
 508                 lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
 509                 while ((lookLine >= 0) && (lookLineLevelNum >= SC_FOLDLEVELBASE)) {
 510                         if (lookLineLevel & SC_FOLDLEVELHEADERFLAG) {
 511                                 if (GetLastChild(lookLine, -1, lookLastLine) == line) {
 512                                         beginFoldBlock = lookLine;
 513                                         endFoldBlock = line;
 514                                         firstChangeableLineBefore = line - 1;
 515                                 }
 516                         }
 517                         if ((lookLine > 0) && (lookLineLevelNum == SC_FOLDLEVELBASE) && ((GetLevel(lookLine - 1) & SC_FOLDLEVELNUMBERMASK) > lookLineLevelNum))
 518                                 break;
 519                         lookLineLevel = GetLevel(--lookLine);
 520                         lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
 521                 }
 522         }
 523         if (firstChangeableLineBefore == -1) {
 524                 for (lookLine = line - 1, lookLineLevel = GetLevel(lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
 525                         lookLine >= beginFoldBlock;
 526                         lookLineLevel = GetLevel(--lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK) {
 527                         if ((lookLineLevel & SC_FOLDLEVELWHITEFLAG) || (lookLineLevelNum > (level & SC_FOLDLEVELNUMBERMASK))) {
 528                                 firstChangeableLineBefore = lookLine;
 529                                 break;
 530                         }
 531                 }
 532         }
 533         if (firstChangeableLineBefore == -1)
 534                 firstChangeableLineBefore = beginFoldBlock - 1;
 535
 536         int firstChangeableLineAfter = -1;
 537         for (lookLine = line + 1, lookLineLevel = GetLevel(lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
 538                 lookLine <= endFoldBlock;
 539                 lookLineLevel = GetLevel(++lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK) {
 540                 if ((lookLineLevel & SC_FOLDLEVELHEADERFLAG) && (lookLineLevelNum < (GetLevel(lookLine + 1) & SC_FOLDLEVELNUMBERMASK))) {
 541                         firstChangeableLineAfter = lookLine;
 542                         break;
 543                 }
 544         }
 545         if (firstChangeableLineAfter == -1)
 546                 firstChangeableLineAfter = endFoldBlock + 1;
 547
 548         highlightDelimiter.beginFoldBlock = beginFoldBlock;
 549         highlightDelimiter.endFoldBlock = endFoldBlock;
 550         highlightDelimiter.firstChangeableLineBefore = firstChangeableLineBefore;
 551         highlightDelimiter.firstChangeableLineAfter = firstChangeableLineAfter;
 552 }
 553
 554 int Document::ClampPositionIntoDocument(int pos) const {
 555         return Platform::Clamp(pos, 0, Length());
 556 }
 557
 558 bool Document::IsCrLf(int pos) const {
 559         if (pos < 0)
 560                 return false;
 561         if (pos >= (Length() - 1))
 562                 return false;
 563         return (cb.CharAt(pos) == '\r') && (cb.CharAt(pos + 1) == '\n');
 564 }
 565
 566 int Document::LenChar(int pos) {
 567         if (pos < 0) {
 568                 return 1;
 569         } else if (IsCrLf(pos)) {
 570                 return 2;
 571         } else if (SC_CP_UTF8 == dbcsCodePage) {
 572                 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(pos));
 573                 const int widthCharBytes = UTF8BytesOfLead[leadByte];
 574                 int lengthDoc = Length();
 575                 if ((pos + widthCharBytes) > lengthDoc)
 576                         return lengthDoc - pos;
 577                 else
 578                         return widthCharBytes;
 579         } else if (dbcsCodePage) {
 580                 return IsDBCSLeadByte(cb.CharAt(pos)) ? 2 : 1;
 581         } else {
 582                 return 1;
 583         }
 584 }
 585
 586 bool Document::InGoodUTF8(int pos, int &start, int &end) const {
 587         int trail = pos;
 588         while ((trail>0) && (pos-trail < UTF8MaxBytes) && UTF8IsTrailByte(static_cast<unsigned char>(cb.CharAt(trail-1))))
 589                 trail--;
 590         start = (trail > 0) ? trail-1 : trail;
 591
 592         const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(start));
 593         const int widthCharBytes = UTF8BytesOfLead[leadByte];
 594         if (widthCharBytes == 1) {
 595                 return false;
 596         } else {
 597                 int trailBytes = widthCharBytes - 1;
 598                 int len = pos - start;
 599                 if (len > trailBytes)
 600                         // pos too far from lead
 601                         return false;
 602                 char charBytes[UTF8MaxBytes] = {static_cast<char>(leadByte),0,0,0};
 603                 for (int b=1; b<widthCharBytes && ((start+b) < Length()); b++)
 604                         charBytes[b] = cb.CharAt(static_cast<int>(start+b));
 605                 int utf8status = UTF8Classify(reinterpret_cast<const unsigned char *>(charBytes), widthCharBytes);
 606                 if (utf8status & UTF8MaskInvalid)
 607                         return false;
 608                 end = start + widthCharBytes;
 609                 return true;
 610         }
 611 }
 612
 613 // Normalise a position so that it is not halfway through a two byte character.
 614 // This can occur in two situations -
 615 // When lines are terminated with \r\n pairs which should be treated as one character.
 616 // When displaying DBCS text such as Japanese.
 617 // If moving, move the position in the indicated direction.
 618 int Document::MovePositionOutsideChar(int pos, int moveDir, bool checkLineEnd) const {
 619         //Platform::DebugPrintf("NoCRLF %d %d\n", pos, moveDir);
 620         // If out of range, just return minimum/maximum value.
 621         if (pos <= 0)
 622                 return 0;
 623         if (pos >= Length())
 624                 return Length();
 625
 626         // PLATFORM_ASSERT(pos > 0 && pos < Length());
 627         if (checkLineEnd && IsCrLf(pos - 1)) {
 628                 if (moveDir > 0)
 629                         return pos + 1;
 630                 else
 631                         return pos - 1;
 632         }
 633
 634         if (dbcsCodePage) {
 635                 if (SC_CP_UTF8 == dbcsCodePage) {
 636                         unsigned char ch = static_cast<unsigned char>(cb.CharAt(pos));
 637                         // If ch is not a trail byte then pos is valid intercharacter position
 638                         if (UTF8IsTrailByte(ch)) {
 639                                 int startUTF = pos;
 640                                 int endUTF = pos;
 641                                 if (InGoodUTF8(pos, startUTF, endUTF)) {
 642                                         // ch is a trail byte within a UTF-8 character
 643                                         if (moveDir > 0)
 644                                                 pos = endUTF;
 645                                         else
 646                                                 pos = startUTF;
 647                                 }
 648                                 // Else invalid UTF-8 so return position of isolated trail byte
 649                         }
 650                 } else {
 651                         // Anchor DBCS calculations at start of line because start of line can
 652                         // not be a DBCS trail byte.
 653                         int posStartLine = LineStart(LineFromPosition(pos));
 654                         if (pos == posStartLine)
 655                                 return pos;
 656
 657                         // Step back until a non-lead-byte is found.
 658                         int posCheck = pos;
 659                         while ((posCheck > posStartLine) && IsDBCSLeadByte(cb.CharAt(posCheck-1)))
 660                                 posCheck--;
 661
 662                         // Check from known start of character.
 663                         while (posCheck < pos) {
 664                                 int mbsize = IsDBCSLeadByte(cb.CharAt(posCheck)) ? 2 : 1;
 665                                 if (posCheck + mbsize == pos) {
 666                                         return pos;
 667                                 } else if (posCheck + mbsize > pos) {
 668                                         if (moveDir > 0) {
 669                                                 return posCheck + mbsize;
 670                                         } else {
 671                                                 return posCheck;
 672                                         }
 673                                 }
 674                                 posCheck += mbsize;
 675                         }
 676                 }
 677         }
 678
 679         return pos;
 680 }
 681
 682 // NextPosition moves between valid positions - it can not handle a position in the middle of a
 683 // multi-byte character. It is used to iterate through text more efficiently than MovePositionOutsideChar.
 684 // A \r\n pair is treated as two characters.
 685 int Document::NextPosition(int pos, int moveDir) const {
 686         // If out of range, just return minimum/maximum value.
 687         int increment = (moveDir > 0) ? 1 : -1;
 688         if (pos + increment <= 0)
 689                 return 0;
 690         if (pos + increment >= Length())
 691                 return Length();
 692
 693         if (dbcsCodePage) {
 694                 if (SC_CP_UTF8 == dbcsCodePage) {
 695                         if (increment == 1) {
 696                                 // Simple forward movement case so can avoid some checks
 697                                 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(pos));
 698                                 if (UTF8IsAscii(leadByte)) {
 699                                         // Single byte character or invalid
 700                                         pos++;
 701                                 } else {
 702                                         const int widthCharBytes = UTF8BytesOfLead[leadByte];
 703                                         char charBytes[UTF8MaxBytes] = {static_cast<char>(leadByte),0,0,0};
 704                                         for (int b=1; b<widthCharBytes; b++)
 705                                                 charBytes[b] = cb.CharAt(static_cast<int>(pos+b));
 706                                         int utf8status = UTF8Classify(reinterpret_cast<const unsigned char *>(charBytes), widthCharBytes);
 707                                         if (utf8status & UTF8MaskInvalid)
 708                                                 pos++;
 709                                         else
 710                                                 pos += utf8status & UTF8MaskWidth;
 711                                 }
 712                         } else {
 713                                 // Examine byte before position
 714                                 pos--;
 715                                 unsigned char ch = static_cast<unsigned char>(cb.CharAt(pos));
 716                                 // If ch is not a trail byte then pos is valid intercharacter position
 717                                 if (UTF8IsTrailByte(ch)) {
 718                                         // If ch is a trail byte in a valid UTF-8 character then return start of character
 719                                         int startUTF = pos;
 720                                         int endUTF = pos;
 721                                         if (InGoodUTF8(pos, startUTF, endUTF)) {
 722                                                 pos = startUTF;
 723                                         }
 724                                         // Else invalid UTF-8 so return position of isolated trail byte
 725                                 }
 726                         }
 727                 } else {
 728                         if (moveDir > 0) {
 729                                 int mbsize = IsDBCSLeadByte(cb.CharAt(pos)) ? 2 : 1;
 730                                 pos += mbsize;
 731                                 if (pos > Length())
 732                                         pos = Length();
 733                         } else {
 734                                 // Anchor DBCS calculations at start of line because start of line can
 735                                 // not be a DBCS trail byte.
 736                                 int posStartLine = LineStart(LineFromPosition(pos));
 737                                 // See http://msdn.microsoft.com/en-us/library/cc194792%28v=MSDN.10%29.aspx
 738                                 // http://msdn.microsoft.com/en-us/library/cc194790.aspx
 739                                 if ((pos - 1) <= posStartLine) {
 740                                         return pos - 1;
 741                                 } else if (IsDBCSLeadByte(cb.CharAt(pos - 1))) {
 742                                         // Must actually be trail byte
 743                                         return pos - 2;
 744                                 } else {
 745                                         // Otherwise, step back until a non-lead-byte is found.
 746                                         int posTemp = pos - 1;
 747                                         while (posStartLine <= --posTemp && IsDBCSLeadByte(cb.CharAt(posTemp)))
 748                                                 ;
 749                                         // Now posTemp+1 must point to the beginning of a character,
 750                                         // so figure out whether we went back an even or an odd
 751                                         // number of bytes and go back 1 or 2 bytes, respectively.
 752                                         return (pos - 1 - ((pos - posTemp) & 1));
 753                                 }
 754                         }
 755                 }
 756         } else {
 757                 pos += increment;
 758         }
 759
 760         return pos;
 761 }
 762
 763 bool Document::NextCharacter(int &pos, int moveDir) const {
 764         // Returns true if pos changed
 765         int posNext = NextPosition(pos, moveDir);
 766         if (posNext == pos) {
 767                 return false;
 768         } else {
 769                 pos = posNext;
 770                 return true;
 771         }
 772 }
 773
 774 // Return -1  on out-of-bounds
 775 Sci_Position SCI_METHOD Document::GetRelativePosition(Sci_Position positionStart, Sci_Position characterOffset) const {
 776         int pos = positionStart;
 777         if (dbcsCodePage) {
 778                 const int increment = (characterOffset > 0) ? 1 : -1;
 779                 while (characterOffset != 0) {
 780                         const int posNext = NextPosition(pos, increment);
 781                         if (posNext == pos)
 782                                 return INVALID_POSITION;
 783                         pos = posNext;
 784                         characterOffset -= increment;
 785                 }
 786         } else {
 787                 pos = positionStart + characterOffset;
 788                 if ((pos < 0) || (pos > Length()))
 789                         return INVALID_POSITION;
 790         }
 791         return pos;
 792 }
 793
 794 int Document::GetRelativePositionUTF16(int positionStart, int characterOffset) const {
 795         int pos = positionStart;
 796         if (dbcsCodePage) {
 797                 const int increment = (characterOffset > 0) ? 1 : -1;
 798                 while (characterOffset != 0) {
 799                         const int posNext = NextPosition(pos, increment);
 800                         if (posNext == pos)
 801                                 return INVALID_POSITION;
 802                         if (abs(pos-posNext) > 3)       // 4 byte character = 2*UTF16.
 803                                 characterOffset -= increment;
 804                         pos = posNext;
 805                         characterOffset -= increment;
 806                 }
 807         } else {
 808                 pos = positionStart + characterOffset;
 809                 if ((pos < 0) || (pos > Length()))
 810                         return INVALID_POSITION;
 811         }
 812         return pos;
 813 }
 814
 815 int SCI_METHOD Document::GetCharacterAndWidth(Sci_Position position, Sci_Position *pWidth) const {
 816         int character;
 817         int bytesInCharacter = 1;
 818         if (dbcsCodePage) {
 819                 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(position));
 820                 if (SC_CP_UTF8 == dbcsCodePage) {
 821                         if (UTF8IsAscii(leadByte)) {
 822                                 // Single byte character or invalid
 823                                 character =  leadByte;
 824                         } else {
 825                                 const int widthCharBytes = UTF8BytesOfLead[leadByte];
 826                                 unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0};
 827                                 for (int b=1; b<widthCharBytes; b++)
 828                                         charBytes[b] = static_cast<unsigned char>(cb.CharAt(position+b));
 829                                 int utf8status = UTF8Classify(charBytes, widthCharBytes);
 830                                 if (utf8status & UTF8MaskInvalid) {
 831                                         // Report as singleton surrogate values which are invalid Unicode
 832                                         character =  0xDC80 + leadByte;
 833                                 } else {
 834                                         bytesInCharacter = utf8status & UTF8MaskWidth;
 835                                         character = UnicodeFromUTF8(charBytes);
 836                                 }
 837                         }
 838                 } else {
 839                         if (IsDBCSLeadByte(leadByte)) {
 840                                 bytesInCharacter = 2;
 841                                 character = (leadByte << 8) | static_cast<unsigned char>(cb.CharAt(position+1));
 842                         } else {
 843                                 character = leadByte;
 844                         }
 845                 }
 846         } else {
 847                 character = cb.CharAt(position);
 848         }
 849         if (pWidth) {
 850                 *pWidth = bytesInCharacter;
 851         }
 852         return character;
 853 }
 854
 855 int SCI_METHOD Document::CodePage() const {
 856         return dbcsCodePage;
 857 }
 858
 859 bool SCI_METHOD Document::IsDBCSLeadByte(char ch) const {
 860         // Byte ranges found in Wikipedia articles with relevant search strings in each case
 861         unsigned char uch = static_cast<unsigned char>(ch);
 862         switch (dbcsCodePage) {
 863                 case 932:
 864                         // Shift_jis
 865                         return ((uch >= 0x81) && (uch <= 0x9F)) ||
 866                                 ((uch >= 0xE0) && (uch <= 0xFC));
 867                                 // Lead bytes F0 to FC may be a Microsoft addition.
 868                 case 936:
 869                         // GBK
 870                         return (uch >= 0x81) && (uch <= 0xFE);
 871                 case 949:
 872                         // Korean Wansung KS C-5601-1987
 873                         return (uch >= 0x81) && (uch <= 0xFE);
 874                 case 950:
 875                         // Big5
 876                         return (uch >= 0x81) && (uch <= 0xFE);
 877                 case 1361:
 878                         // Korean Johab KS C-5601-1992
 879                         return
 880                                 ((uch >= 0x84) && (uch <= 0xD3)) ||
 881                                 ((uch >= 0xD8) && (uch <= 0xDE)) ||
 882                                 ((uch >= 0xE0) && (uch <= 0xF9));
 883         }
 884         return false;
 885 }
 886
 887 static inline bool IsSpaceOrTab(int ch) {
 888         return ch == ' ' || ch == '\t';
 889 }
 890
 891 // Need to break text into segments near lengthSegment but taking into
 892 // account the encoding to not break inside a UTF-8 or DBCS character
 893 // and also trying to avoid breaking inside a pair of combining characters.
 894 // The segment length must always be long enough (more than 4 bytes)
 895 // so that there will be at least one whole character to make a segment.
 896 // For UTF-8, text must consist only of valid whole characters.
 897 // In preference order from best to worst:
 898 //   1) Break after space
 899 //   2) Break before punctuation
 900 //   3) Break after whole character
 901
 902 int Document::SafeSegment(const char *text, int length, int lengthSegment) const {
 903         if (length <= lengthSegment)
 904                 return length;
 905         int lastSpaceBreak = -1;
 906         int lastPunctuationBreak = -1;
 907         int lastEncodingAllowedBreak = 0;
 908         for (int j=0; j < lengthSegment;) {
 909                 unsigned char ch = static_cast<unsigned char>(text[j]);
 910                 if (j > 0) {
 911                         if (IsSpaceOrTab(text[j - 1]) && !IsSpaceOrTab(text[j])) {
 912                                 lastSpaceBreak = j;
 913                         }
 914                         if (ch < 'A') {
 915                                 lastPunctuationBreak = j;
 916                         }
 917                 }
 918                 lastEncodingAllowedBreak = j;
 919
 920                 if (dbcsCodePage == SC_CP_UTF8) {
 921                         j += UTF8BytesOfLead[ch];
 922                 } else if (dbcsCodePage) {
 923                         j += IsDBCSLeadByte(ch) ? 2 : 1;
 924                 } else {
 925                         j++;
 926                 }
 927         }
 928         if (lastSpaceBreak >= 0) {
 929                 return lastSpaceBreak;
 930         } else if (lastPunctuationBreak >= 0) {
 931                 return lastPunctuationBreak;
 932         }
 933         return lastEncodingAllowedBreak;
 934 }
 935
 936 EncodingFamily Document::CodePageFamily() const {
 937         if (SC_CP_UTF8 == dbcsCodePage)
 938                 return efUnicode;
 939         else if (dbcsCodePage)
 940                 return efDBCS;
 941         else
 942                 return efEightBit;
 943 }
 944
 945 void Document::ModifiedAt(int pos) {
 946         if (endStyled > pos)
 947                 endStyled = pos;
 948 }
 949
 950 void Document::CheckReadOnly() {
 951         if (cb.IsReadOnly() && enteredReadOnlyCount == 0) {
 952                 enteredReadOnlyCount++;
 953                 NotifyModifyAttempt();
 954                 enteredReadOnlyCount--;
 955         }
 956 }
 957
 958 // Document only modified by gateways DeleteChars, InsertString, Undo, Redo, and SetStyleAt.
 959 // SetStyleAt does not change the persistent state of a document
 960
 961 bool Document::DeleteChars(int pos, int len) {
 962         if (pos < 0)
 963                 return false;
 964         if (len <= 0)
 965                 return false;
 966         if ((pos + len) > Length())
 967                 return false;
 968         CheckReadOnly();
 969         if (enteredModification != 0) {
 970                 return false;
 971         } else {
 972                 enteredModification++;
 973                 if (!cb.IsReadOnly()) {
 974                         NotifyModified(
 975                             DocModification(
 976                                 SC_MOD_BEFOREDELETE | SC_PERFORMED_USER,
 977                                 pos, len,
 978                                 0, 0));
 979                         int prevLinesTotal = LinesTotal();
 980                         bool startSavePoint = cb.IsSavePoint();
 981                         bool startSequence = false;
 982                         const char *text = cb.DeleteChars(pos, len, startSequence);
 983                         if (startSavePoint && cb.IsCollectingUndo())
 984                                 NotifySavePoint(!startSavePoint);
 985                         if ((pos < Length()) || (pos == 0))
 986                                 ModifiedAt(pos);
 987                         else
 988                                 ModifiedAt(pos-1);
 989                         NotifyModified(
 990                             DocModification(
 991                                 SC_MOD_DELETETEXT | SC_PERFORMED_USER | (startSequence?SC_STARTACTION:0),
 992                                 pos, len,
 993                                 LinesTotal() - prevLinesTotal, text));
 994                 }
 995                 enteredModification--;
 996         }
 997         return !cb.IsReadOnly();
 998 }
 999
1000 /**
1001  * Insert a string with a length.
1002  */
1003 int Document::InsertString(int position, const char *s, int insertLength) {
1004         if (insertLength <= 0) {
1005                 return 0;
1006         }
1007         CheckReadOnly();        // Application may change read only state here
1008         if (cb.IsReadOnly()) {
1009                 return 0;
1010         }
1011         if (enteredModification != 0) {
1012                 return 0;
1013         }
1014         enteredModification++;
1015         insertionSet = false;
1016         insertion.clear();
1017         NotifyModified(
1018                 DocModification(
1019                         SC_MOD_INSERTCHECK,
1020                         position, insertLength,
1021                         0, s));
1022         if (insertionSet) {
1023                 s = insertion.c_str();
1024                 insertLength = static_cast<int>(insertion.length());
1025         }
1026         NotifyModified(
1027                 DocModification(
1028                         SC_MOD_BEFOREINSERT | SC_PERFORMED_USER,
1029                         position, insertLength,
1030                         0, s));
1031         int prevLinesTotal = LinesTotal();
1032         bool startSavePoint = cb.IsSavePoint();
1033         bool startSequence = false;
1034         const char *text = cb.InsertString(position, s, insertLength, startSequence);
1035         if (startSavePoint && cb.IsCollectingUndo())
1036                 NotifySavePoint(!startSavePoint);
1037         ModifiedAt(position);
1038         NotifyModified(
1039                 DocModification(
1040                         SC_MOD_INSERTTEXT | SC_PERFORMED_USER | (startSequence?SC_STARTACTION:0),
1041                         position, insertLength,
1042                         LinesTotal() - prevLinesTotal, text));
1043         if (insertionSet) {     // Free memory as could be large
1044                 std::string().swap(insertion);
1045         }
1046         enteredModification--;
1047         return insertLength;
1048 }
1049
1050 void Document::ChangeInsertion(const char *s, int length) {
1051         insertionSet = true;
1052         insertion.assign(s, length);
1053 }
1054
1055 int SCI_METHOD Document::AddData(char *data, Sci_Position length) {
1056         try {
1057                 int position = Length();
1058                 InsertString(position, data, length);
1059         } catch (std::bad_alloc &) {
1060                 return SC_STATUS_BADALLOC;
1061         } catch (...) {
1062                 return SC_STATUS_FAILURE;
1063         }
1064         return 0;
1065 }
1066
1067 void * SCI_METHOD Document::ConvertToDocument() {
1068         return this;
1069 }
1070
1071 int Document::Undo() {
1072         int newPos = -1;
1073         CheckReadOnly();
1074         if ((enteredModification == 0) && (cb.IsCollectingUndo())) {
1075                 enteredModification++;
1076                 if (!cb.IsReadOnly()) {
1077                         bool startSavePoint = cb.IsSavePoint();
1078                         bool multiLine = false;
1079                         int steps = cb.StartUndo();
1080                         //Platform::DebugPrintf("Steps=%d\n", steps);
1081                         int coalescedRemovePos = -1;
1082                         int coalescedRemoveLen = 0;
1083                         int prevRemoveActionPos = -1;
1084                         int prevRemoveActionLen = 0;
1085                         for (int step = 0; step < steps; step++) {
1086                                 const int prevLinesTotal = LinesTotal();
1087                                 const Action &action = cb.GetUndoStep();
1088                                 if (action.at == removeAction) {
1089                                         NotifyModified(DocModification(
1090                                                                         SC_MOD_BEFOREINSERT | SC_PERFORMED_UNDO, action));
1091                                 } else if (action.at == containerAction) {
1092                                         DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_UNDO);
1093                                         dm.token = action.position;
1094                                         NotifyModified(dm);
1095                                         if (!action.mayCoalesce) {
1096                                                 coalescedRemovePos = -1;
1097                                                 coalescedRemoveLen = 0;
1098                                                 prevRemoveActionPos = -1;
1099                                                 prevRemoveActionLen = 0;
1100                                         }
1101                                 } else {
1102                                         NotifyModified(DocModification(
1103                                                                         SC_MOD_BEFOREDELETE | SC_PERFORMED_UNDO, action));
1104                                 }
1105                                 cb.PerformUndoStep();
1106                                 if (action.at != containerAction) {
1107                                         ModifiedAt(action.position);
1108                                         newPos = action.position;
1109                                 }
1110
1111                                 int modFlags = SC_PERFORMED_UNDO;
1112                                 // With undo, an insertion action becomes a deletion notification
1113                                 if (action.at == removeAction) {
1114                                         newPos += action.lenData;
1115                                         modFlags |= SC_MOD_INSERTTEXT;
1116                                         if ((coalescedRemoveLen > 0) &&
1117                                                 (action.position == prevRemoveActionPos || action.position == (prevRemoveActionPos + prevRemoveActionLen))) {
1118                                                 coalescedRemoveLen += action.lenData;
1119                                                 newPos = coalescedRemovePos + coalescedRemoveLen;
1120                                         } else {
1121                                                 coalescedRemovePos = action.position;
1122                                                 coalescedRemoveLen = action.lenData;
1123                                         }
1124                                         prevRemoveActionPos = action.position;
1125                                         prevRemoveActionLen = action.lenData;
1126                                 } else if (action.at == insertAction) {
1127                                         modFlags |= SC_MOD_DELETETEXT;
1128                                         coalescedRemovePos = -1;
1129                                         coalescedRemoveLen = 0;
1130                                         prevRemoveActionPos = -1;
1131                                         prevRemoveActionLen = 0;
1132                                 }
1133                                 if (steps > 1)
1134                                         modFlags |= SC_MULTISTEPUNDOREDO;
1135                                 const int linesAdded = LinesTotal() - prevLinesTotal;
1136                                 if (linesAdded != 0)
1137                                         multiLine = true;
1138                                 if (step == steps - 1) {
1139                                         modFlags |= SC_LASTSTEPINUNDOREDO;
1140                                         if (multiLine)
1141                                                 modFlags |= SC_MULTILINEUNDOREDO;
1142                                 }
1143                                 NotifyModified(DocModification(modFlags, action.position, action.lenData,
1144                                                                                            linesAdded, action.data));
1145                         }
1146
1147                         bool endSavePoint = cb.IsSavePoint();
1148                         if (startSavePoint != endSavePoint)
1149                                 NotifySavePoint(endSavePoint);
1150                 }
1151                 enteredModification--;
1152         }
1153         return newPos;
1154 }
1155
1156 int Document::Redo() {
1157         int newPos = -1;
1158         CheckReadOnly();
1159         if ((enteredModification == 0) && (cb.IsCollectingUndo())) {
1160                 enteredModification++;
1161                 if (!cb.IsReadOnly()) {
1162                         bool startSavePoint = cb.IsSavePoint();
1163                         bool multiLine = false;
1164                         int steps = cb.StartRedo();
1165                         for (int step = 0; step < steps; step++) {
1166                                 const int prevLinesTotal = LinesTotal();
1167                                 const Action &action = cb.GetRedoStep();
1168                                 if (action.at == insertAction) {
1169                                         NotifyModified(DocModification(
1170                                                                         SC_MOD_BEFOREINSERT | SC_PERFORMED_REDO, action));
1171                                 } else if (action.at == containerAction) {
1172                                         DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_REDO);
1173                                         dm.token = action.position;
1174                                         NotifyModified(dm);
1175                                 } else {
1176                                         NotifyModified(DocModification(
1177                                                                         SC_MOD_BEFOREDELETE | SC_PERFORMED_REDO, action));
1178                                 }
1179                                 cb.PerformRedoStep();
1180                                 if (action.at != containerAction) {
1181                                         ModifiedAt(action.position);
1182                                         newPos = action.position;
1183                                 }
1184
1185                                 int modFlags = SC_PERFORMED_REDO;
1186                                 if (action.at == insertAction) {
1187                                         newPos += action.lenData;
1188                                         modFlags |= SC_MOD_INSERTTEXT;
1189                                 } else if (action.at == removeAction) {
1190                                         modFlags |= SC_MOD_DELETETEXT;
1191                                 }
1192                                 if (steps > 1)
1193                                         modFlags |= SC_MULTISTEPUNDOREDO;
1194                                 const int linesAdded = LinesTotal() - prevLinesTotal;
1195                                 if (linesAdded != 0)
1196                                         multiLine = true;
1197                                 if (step == steps - 1) {
1198                                         modFlags |= SC_LASTSTEPINUNDOREDO;
1199                                         if (multiLine)
1200                                                 modFlags |= SC_MULTILINEUNDOREDO;
1201                                 }
1202                                 NotifyModified(
1203                                         DocModification(modFlags, action.position, action.lenData,
1204                                                                         linesAdded, action.data));
1205                         }
1206
1207                         bool endSavePoint = cb.IsSavePoint();
1208                         if (startSavePoint != endSavePoint)
1209                                 NotifySavePoint(endSavePoint);
1210                 }
1211                 enteredModification--;
1212         }
1213         return newPos;
1214 }
1215
1216 void Document::DelChar(int pos) {
1217         DeleteChars(pos, LenChar(pos));
1218 }
1219
1220 void Document::DelCharBack(int pos) {
1221         if (pos <= 0) {
1222                 return;
1223         } else if (IsCrLf(pos - 2)) {
1224                 DeleteChars(pos - 2, 2);
1225         } else if (dbcsCodePage) {
1226                 int startChar = NextPosition(pos, -1);
1227                 DeleteChars(startChar, pos - startChar);
1228         } else {
1229                 DeleteChars(pos - 1, 1);
1230         }
1231 }
1232
1233 static int NextTab(int pos, int tabSize) {
1234         return ((pos / tabSize) + 1) * tabSize;
1235 }
1236
1237 static std::string CreateIndentation(int indent, int tabSize, bool insertSpaces) {
1238         std::string indentation;
1239         if (!insertSpaces) {
1240                 while (indent >= tabSize) {
1241                         indentation += '\t';
1242                         indent -= tabSize;
1243                 }
1244         }
1245         while (indent > 0) {
1246                 indentation += ' ';
1247                 indent--;
1248         }
1249         return indentation;
1250 }
1251
1252 int SCI_METHOD Document::GetLineIndentation(Sci_Position line) {
1253         int indent = 0;
1254         if ((line >= 0) && (line < LinesTotal())) {
1255                 int lineStart = LineStart(line);
1256                 int length = Length();
1257                 for (int i = lineStart; i < length; i++) {
1258                         char ch = cb.CharAt(i);
1259                         if (ch == ' ')
1260                                 indent++;
1261                         else if (ch == '\t')
1262                                 indent = NextTab(indent, tabInChars);
1263                         else
1264                                 return indent;
1265                 }
1266         }
1267         return indent;
1268 }
1269
1270 int Document::SetLineIndentation(int line, int indent) {
1271         int indentOfLine = GetLineIndentation(line);
1272         if (indent < 0)
1273                 indent = 0;
1274         if (indent != indentOfLine) {
1275                 std::string linebuf = CreateIndentation(indent, tabInChars, !useTabs);
1276                 int thisLineStart = LineStart(line);
1277                 int indentPos = GetLineIndentPosition(line);
1278                 UndoGroup ug(this);
1279                 DeleteChars(thisLineStart, indentPos - thisLineStart);
1280                 return thisLineStart + InsertString(thisLineStart, linebuf.c_str(),
1281                         static_cast<int>(linebuf.length()));
1282         } else {
1283                 return GetLineIndentPosition(line);
1284         }
1285 }
1286
1287 int Document::GetLineIndentPosition(int line) const {
1288         if (line < 0)
1289                 return 0;
1290         int pos = LineStart(line);
1291         int length = Length();
1292         while ((pos < length) && IsSpaceOrTab(cb.CharAt(pos))) {
1293                 pos++;
1294         }
1295         return pos;
1296 }
1297
1298 int Document::GetColumn(int pos) {
1299         int column = 0;
1300         int line = LineFromPosition(pos);
1301         if ((line >= 0) && (line < LinesTotal())) {
1302                 for (int i = LineStart(line); i < pos;) {
1303                         char ch = cb.CharAt(i);
1304                         if (ch == '\t') {
1305                                 column = NextTab(column, tabInChars);
1306                                 i++;
1307                         } else if (ch == '\r') {
1308                                 return column;
1309                         } else if (ch == '\n') {
1310                                 return column;
1311                         } else if (i >= Length()) {
1312                                 return column;
1313                         } else {
1314                                 column++;
1315                                 i = NextPosition(i, 1);
1316                         }
1317                 }
1318         }
1319         return column;
1320 }
1321
1322 int Document::CountCharacters(int startPos, int endPos) const {
1323         startPos = MovePositionOutsideChar(startPos, 1, false);
1324         endPos = MovePositionOutsideChar(endPos, -1, false);
1325         int count = 0;
1326         int i = startPos;
1327         while (i < endPos) {
1328                 count++;
1329                 i = NextPosition(i, 1);
1330         }
1331         return count;
1332 }
1333
1334 int Document::CountUTF16(int startPos, int endPos) const {
1335         startPos = MovePositionOutsideChar(startPos, 1, false);
1336         endPos = MovePositionOutsideChar(endPos, -1, false);
1337         int count = 0;
1338         int i = startPos;
1339         while (i < endPos) {
1340                 count++;
1341                 const int next = NextPosition(i, 1);
1342                 if ((next - i) > 3)
1343                         count++;
1344                 i = next;
1345         }
1346         return count;
1347 }
1348
1349 int Document::FindColumn(int line, int column) {
1350         int position = LineStart(line);
1351         if ((line >= 0) && (line < LinesTotal())) {
1352                 int columnCurrent = 0;
1353                 while ((columnCurrent < column) && (position < Length())) {
1354                         char ch = cb.CharAt(position);
1355                         if (ch == '\t') {
1356                                 columnCurrent = NextTab(columnCurrent, tabInChars);
1357                                 if (columnCurrent > column)
1358                                         return position;
1359                                 position++;
1360                         } else if (ch == '\r') {
1361                                 return position;
1362                         } else if (ch == '\n') {
1363                                 return position;
1364                         } else {
1365                                 columnCurrent++;
1366                                 position = NextPosition(position, 1);
1367                         }
1368                 }
1369         }
1370         return position;
1371 }
1372
1373 void Document::Indent(bool forwards, int lineBottom, int lineTop) {
1374         // Dedent - suck white space off the front of the line to dedent by equivalent of a tab
1375         for (int line = lineBottom; line >= lineTop; line--) {
1376                 int indentOfLine = GetLineIndentation(line);
1377                 if (forwards) {
1378                         if (LineStart(line) < LineEnd(line)) {
1379                                 SetLineIndentation(line, indentOfLine + IndentSize());
1380                         }
1381                 } else {
1382                         SetLineIndentation(line, indentOfLine - IndentSize());
1383                 }
1384         }
1385 }
1386
1387 // Convert line endings for a piece of text to a particular mode.
1388 // Stop at len or when a NUL is found.
1389 std::string Document::TransformLineEnds(const char *s, size_t len, int eolModeWanted) {
1390         std::string dest;
1391         for (size_t i = 0; (i < len) && (s[i]); i++) {
1392                 if (s[i] == '\n' || s[i] == '\r') {
1393                         if (eolModeWanted == SC_EOL_CR) {
1394                                 dest.push_back('\r');
1395                         } else if (eolModeWanted == SC_EOL_LF) {
1396                                 dest.push_back('\n');
1397                         } else { // eolModeWanted == SC_EOL_CRLF
1398                                 dest.push_back('\r');
1399                                 dest.push_back('\n');
1400                         }
1401                         if ((s[i] == '\r') && (i+1 < len) && (s[i+1] == '\n')) {
1402                                 i++;
1403                         }
1404                 } else {
1405                         dest.push_back(s[i]);
1406                 }
1407         }
1408         return dest;
1409 }
1410
1411 void Document::ConvertLineEnds(int eolModeSet) {
1412         UndoGroup ug(this);
1413
1414         for (int pos = 0; pos < Length(); pos++) {
1415                 if (cb.CharAt(pos) == '\r') {
1416                         if (cb.CharAt(pos + 1) == '\n') {
1417                                 // CRLF
1418                                 if (eolModeSet == SC_EOL_CR) {
1419                                         DeleteChars(pos + 1, 1); // Delete the LF
1420                                 } else if (eolModeSet == SC_EOL_LF) {
1421                                         DeleteChars(pos, 1); // Delete the CR
1422                                 } else {
1423                                         pos++;
1424                                 }
1425                         } else {
1426                                 // CR
1427                                 if (eolModeSet == SC_EOL_CRLF) {
1428                                         pos += InsertString(pos + 1, "\n", 1); // Insert LF
1429                                 } else if (eolModeSet == SC_EOL_LF) {
1430                                         pos += InsertString(pos, "\n", 1); // Insert LF
1431                                         DeleteChars(pos, 1); // Delete CR
1432                                         pos--;
1433                                 }
1434                         }
1435                 } else if (cb.CharAt(pos) == '\n') {
1436                         // LF
1437                         if (eolModeSet == SC_EOL_CRLF) {
1438                                 pos += InsertString(pos, "\r", 1); // Insert CR
1439                         } else if (eolModeSet == SC_EOL_CR) {
1440                                 pos += InsertString(pos, "\r", 1); // Insert CR
1441                                 DeleteChars(pos, 1); // Delete LF
1442                                 pos--;
1443                         }
1444                 }
1445         }
1446
1447 }
1448
1449 bool Document::IsWhiteLine(int line) const {
1450         int currentChar = LineStart(line);
1451         int endLine = LineEnd(line);
1452         while (currentChar < endLine) {
1453                 if (cb.CharAt(currentChar) != ' ' && cb.CharAt(currentChar) != '\t') {
1454                         return false;
1455                 }
1456                 ++currentChar;
1457         }
1458         return true;
1459 }
1460
1461 int Document::ParaUp(int pos) const {
1462         int line = LineFromPosition(pos);
1463         line--;
1464         while (line >= 0 && IsWhiteLine(line)) { // skip empty lines
1465                 line--;
1466         }
1467         while (line >= 0 && !IsWhiteLine(line)) { // skip non-empty lines
1468                 line--;
1469         }
1470         line++;
1471         return LineStart(line);
1472 }
1473
1474 int Document::ParaDown(int pos) const {
1475         int line = LineFromPosition(pos);
1476         while (line < LinesTotal() && !IsWhiteLine(line)) { // skip non-empty lines
1477                 line++;
1478         }
1479         while (line < LinesTotal() && IsWhiteLine(line)) { // skip empty lines
1480                 line++;
1481         }
1482         if (line < LinesTotal())
1483                 return LineStart(line);
1484         else // end of a document
1485                 return LineEnd(line-1);
1486 }
1487
1488 CharClassify::cc Document::WordCharClass(unsigned char ch) const {
1489         if ((SC_CP_UTF8 == dbcsCodePage) && (!UTF8IsAscii(ch)))
1490                 return CharClassify::ccWord;
1491         return charClass.GetClass(ch);
1492 }
1493
1494 /**
1495  * Used by commmands that want to select whole words.
1496  * Finds the start of word at pos when delta < 0 or the end of the word when delta >= 0.
1497  */
1498 int Document::ExtendWordSelect(int pos, int delta, bool onlyWordCharacters) {
1499         CharClassify::cc ccStart = CharClassify::ccWord;
1500         if (delta < 0) {
1501                 if (!onlyWordCharacters)
1502                         ccStart = WordCharClass(cb.CharAt(pos-1));
1503                 while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == ccStart))
1504                         pos--;
1505         } else {
1506                 if (!onlyWordCharacters && pos < Length())
1507                         ccStart = WordCharClass(cb.CharAt(pos));
1508                 while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == ccStart))
1509                         pos++;
1510         }
1511         return MovePositionOutsideChar(pos, delta, true);
1512 }
1513
1514 /**
1515  * Find the start of the next word in either a forward (delta >= 0) or backwards direction
1516  * (delta < 0).
1517  * This is looking for a transition between character classes although there is also some
1518  * additional movement to transit white space.
1519  * Used by cursor movement by word commands.
1520  */
1521 int Document::NextWordStart(int pos, int delta) {
1522         if (delta < 0) {
1523                 while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == CharClassify::ccSpace))
1524                         pos--;
1525                 if (pos > 0) {
1526                         CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos-1));
1527                         while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == ccStart)) {
1528                                 pos--;
1529                         }
1530                 }
1531         } else {
1532                 CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos));
1533                 while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == ccStart))
1534                         pos++;
1535                 while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == CharClassify::ccSpace))
1536                         pos++;
1537         }
1538         return pos;
1539 }
1540
1541 /**
1542  * Find the end of the next word in either a forward (delta >= 0) or backwards direction
1543  * (delta < 0).
1544  * This is looking for a transition between character classes although there is also some
1545  * additional movement to transit white space.
1546  * Used by cursor movement by word commands.
1547  */
1548 int Document::NextWordEnd(int pos, int delta) {
1549         if (delta < 0) {
1550                 if (pos > 0) {
1551                         CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos-1));
1552                         if (ccStart != CharClassify::ccSpace) {
1553                                 while (pos > 0 && WordCharClass(cb.CharAt(pos - 1)) == ccStart) {
1554                                         pos--;
1555                                 }
1556                         }
1557                         while (pos > 0 && WordCharClass(cb.CharAt(pos - 1)) == CharClassify::ccSpace) {
1558                                 pos--;
1559                         }
1560                 }
1561         } else {
1562                 while (pos < Length() && WordCharClass(cb.CharAt(pos)) == CharClassify::ccSpace) {
1563                         pos++;
1564                 }
1565                 if (pos < Length()) {
1566                         CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos));
1567                         while (pos < Length() && WordCharClass(cb.CharAt(pos)) == ccStart) {
1568                                 pos++;
1569                         }
1570                 }
1571         }
1572         return pos;
1573 }
1574
1575 /**
1576  * Check that the character at the given position is a word or punctuation character and that
1577  * the previous character is of a different character class.
1578  */
1579 bool Document::IsWordStartAt(int pos) const {
1580         if (pos > 0) {
1581                 CharClassify::cc ccPos = WordCharClass(CharAt(pos));
1582                 return (ccPos == CharClassify::ccWord || ccPos == CharClassify::ccPunctuation) &&
1583                         (ccPos != WordCharClass(CharAt(pos - 1)));
1584         }
1585         return true;
1586 }
1587
1588 /**
1589  * Check that the character at the given position is a word or punctuation character and that
1590  * the next character is of a different character class.
1591  */
1592 bool Document::IsWordEndAt(int pos) const {
1593         if (pos < Length()) {
1594                 CharClassify::cc ccPrev = WordCharClass(CharAt(pos-1));
1595                 return (ccPrev == CharClassify::ccWord || ccPrev == CharClassify::ccPunctuation) &&
1596                         (ccPrev != WordCharClass(CharAt(pos)));
1597         }
1598         return true;
1599 }
1600
1601 /**
1602  * Check that the given range is has transitions between character classes at both
1603  * ends and where the characters on the inside are word or punctuation characters.
1604  */
1605 bool Document::IsWordAt(int start, int end) const {
1606         return (start < end) && IsWordStartAt(start) && IsWordEndAt(end);
1607 }
1608
1609 bool Document::MatchesWordOptions(bool word, bool wordStart, int pos, int length) const {
1610         return (!word && !wordStart) ||
1611                         (word && IsWordAt(pos, pos + length)) ||
1612                         (wordStart && IsWordStartAt(pos));
1613 }
1614
1615 bool Document::HasCaseFolder(void) const {
1616         return pcf != 0;
1617 }
1618
1619 void Document::SetCaseFolder(CaseFolder *pcf_) {
1620         delete pcf;
1621         pcf = pcf_;
1622 }
1623
1624 Document::CharacterExtracted Document::ExtractCharacter(int position) const {
1625         const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(position));
1626         if (UTF8IsAscii(leadByte)) {
1627                 // Common case: ASCII character
1628                 return CharacterExtracted(leadByte, 1);
1629         }
1630         const int widthCharBytes = UTF8BytesOfLead[leadByte];
1631         unsigned char charBytes[UTF8MaxBytes] = { leadByte, 0, 0, 0 };
1632         for (int b=1; b<widthCharBytes; b++)
1633                 charBytes[b] = static_cast<unsigned char>(cb.CharAt(position + b));
1634         int utf8status = UTF8Classify(charBytes, widthCharBytes);
1635         if (utf8status & UTF8MaskInvalid) {
1636                 // Treat as invalid and use up just one byte
1637                 return CharacterExtracted(unicodeReplacementChar, 1);
1638         } else {
1639                 return CharacterExtracted(UnicodeFromUTF8(charBytes), utf8status & UTF8MaskWidth);
1640         }
1641 }
1642
1643 /**
1644  * Find text in document, supporting both forward and backward
1645  * searches (just pass minPos > maxPos to do a backward search)
1646  * Has not been tested with backwards DBCS searches yet.
1647  */
1648 long Document::FindText(int minPos, int maxPos, const char *search,
1649                         int flags, int *length) {
1650         if (*length <= 0)
1651                 return minPos;
1652         const bool caseSensitive = (flags & SCFIND_MATCHCASE) != 0;
1653         const bool word = (flags & SCFIND_WHOLEWORD) != 0;
1654         const bool wordStart = (flags & SCFIND_WORDSTART) != 0;
1655         const bool regExp = (flags & SCFIND_REGEXP) != 0;
1656         if (regExp) {
1657                 if (!regex)
1658                         regex = CreateRegexSearch(&charClass);
1659                 return regex->FindText(this, minPos, maxPos, search, caseSensitive, word, wordStart, flags, length);
1660         } else {
1661
1662                 const bool forward = minPos <= maxPos;
1663                 const int increment = forward ? 1 : -1;
1664
1665                 // Range endpoints should not be inside DBCS characters, but just in case, move them.
1666                 const int startPos = MovePositionOutsideChar(minPos, increment, false);
1667                 const int endPos = MovePositionOutsideChar(maxPos, increment, false);
1668
1669                 // Compute actual search ranges needed
1670                 const int lengthFind = *length;
1671
1672                 //Platform::DebugPrintf("Find %d %d %s %d\n", startPos, endPos, ft->lpstrText, lengthFind);
1673                 const int limitPos = Platform::Maximum(startPos, endPos);
1674                 int pos = startPos;
1675                 if (!forward) {
1676                         // Back all of a character
1677                         pos = NextPosition(pos, increment);
1678                 }
1679                 if (caseSensitive) {
1680                         const int endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos;
1681                         const char charStartSearch =  search[0];
1682                         while (forward ? (pos < endSearch) : (pos >= endSearch)) {
1683                                 if (CharAt(pos) == charStartSearch) {
1684                                         bool found = (pos + lengthFind) <= limitPos;
1685                                         for (int indexSearch = 1; (indexSearch < lengthFind) && found; indexSearch++) {
1686                                                 found = CharAt(pos + indexSearch) == search[indexSearch];
1687                                         }
1688                                         if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
1689                                                 return pos;
1690                                         }
1691                                 }
1692                                 if (!NextCharacter(pos, increment))
1693                                         break;
1694                         }
1695                 } else if (SC_CP_UTF8 == dbcsCodePage) {
1696                         const size_t maxFoldingExpansion = 4;
1697                         std::vector<char> searchThing(lengthFind * UTF8MaxBytes * maxFoldingExpansion + 1);
1698                         const int lenSearch = static_cast<int>(
1699                                 pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind));
1700                         char bytes[UTF8MaxBytes + 1];
1701                         char folded[UTF8MaxBytes * maxFoldingExpansion + 1];
1702                         while (forward ? (pos < endPos) : (pos >= endPos)) {
1703                                 int widthFirstCharacter = 0;
1704                                 int posIndexDocument = pos;
1705                                 int indexSearch = 0;
1706                                 bool characterMatches = true;
1707                                 for (;;) {
1708                                         const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(posIndexDocument));
1709                                         bytes[0] = leadByte;
1710                                         int widthChar = 1;
1711                                         if (!UTF8IsAscii(leadByte)) {
1712                                                 const int widthCharBytes = UTF8BytesOfLead[leadByte];
1713                                                 for (int b=1; b<widthCharBytes; b++) {
1714                                                         bytes[b] = cb.CharAt(posIndexDocument+b);
1715                                                 }
1716                                                 widthChar = UTF8Classify(reinterpret_cast<const unsigned char *>(bytes), widthCharBytes) & UTF8MaskWidth;
1717                                         }
1718                                         if (!widthFirstCharacter)
1719                                                 widthFirstCharacter = widthChar;
1720                                         if ((posIndexDocument + widthChar) > limitPos)
1721                                                 break;
1722                                         const int lenFlat = static_cast<int>(pcf->Fold(folded, sizeof(folded), bytes, widthChar));
1723                                         folded[lenFlat] = 0;
1724                                         // Does folded match the buffer
1725                                         characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat);
1726                                         if (!characterMatches)
1727                                                 break;
1728                                         posIndexDocument += widthChar;
1729                                         indexSearch += lenFlat;
1730                                         if (indexSearch >= lenSearch)
1731                                                 break;
1732                                 }
1733                                 if (characterMatches && (indexSearch == static_cast<int>(lenSearch))) {
1734                                         if (MatchesWordOptions(word, wordStart, pos, posIndexDocument - pos)) {
1735                                                 *length = posIndexDocument - pos;
1736                                                 return pos;
1737                                         }
1738                                 }
1739                                 if (forward) {
1740                                         pos += widthFirstCharacter;
1741                                 } else {
1742                                         if (!NextCharacter(pos, increment))
1743                                                 break;
1744                                 }
1745                         }
1746                 } else if (dbcsCodePage) {
1747                         const size_t maxBytesCharacter = 2;
1748                         const size_t maxFoldingExpansion = 4;
1749                         std::vector<char> searchThing(lengthFind * maxBytesCharacter * maxFoldingExpansion + 1);
1750                         const int lenSearch = static_cast<int>(
1751                                 pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind));
1752                         while (forward ? (pos < endPos) : (pos >= endPos)) {
1753                                 int indexDocument = 0;
1754                                 int indexSearch = 0;
1755                                 bool characterMatches = true;
1756                                 while (characterMatches &&
1757                                         ((pos + indexDocument) < limitPos) &&
1758                                         (indexSearch < lenSearch)) {
1759                                         char bytes[maxBytesCharacter + 1];
1760                                         bytes[0] = cb.CharAt(pos + indexDocument);
1761                                         const int widthChar = IsDBCSLeadByte(bytes[0]) ? 2 : 1;
1762                                         if (widthChar == 2)
1763                                                 bytes[1] = cb.CharAt(pos + indexDocument + 1);
1764                                         if ((pos + indexDocument + widthChar) > limitPos)
1765                                                 break;
1766                                         char folded[maxBytesCharacter * maxFoldingExpansion + 1];
1767                                         const int lenFlat = static_cast<int>(pcf->Fold(folded, sizeof(folded), bytes, widthChar));
1768                                         folded[lenFlat] = 0;
1769                                         // Does folded match the buffer
1770                                         characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat);
1771                                         indexDocument += widthChar;
1772                                         indexSearch += lenFlat;
1773                                 }
1774                                 if (characterMatches && (indexSearch == static_cast<int>(lenSearch))) {
1775                                         if (MatchesWordOptions(word, wordStart, pos, indexDocument)) {
1776                                                 *length = indexDocument;
1777                                                 return pos;
1778                                         }
1779                                 }
1780                                 if (!NextCharacter(pos, increment))
1781                                         break;
1782                         }
1783                 } else {
1784                         const int endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos;
1785                         std::vector<char> searchThing(lengthFind + 1);
1786                         pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind);
1787                         while (forward ? (pos < endSearch) : (pos >= endSearch)) {
1788                                 bool found = (pos + lengthFind) <= limitPos;
1789                                 for (int indexSearch = 0; (indexSearch < lengthFind) && found; indexSearch++) {
1790                                         char ch = CharAt(pos + indexSearch);
1791                                         char folded[2];
1792                                         pcf->Fold(folded, sizeof(folded), &ch, 1);
1793                                         found = folded[0] == searchThing[indexSearch];
1794                                 }
1795                                 if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
1796                                         return pos;
1797                                 }
1798                                 if (!NextCharacter(pos, increment))
1799                                         break;
1800                         }
1801                 }
1802         }
1803         //Platform::DebugPrintf("Not found\n");
1804         return -1;
1805 }
1806
1807 const char *Document::SubstituteByPosition(const char *text, int *length) {
1808         if (regex)
1809                 return regex->SubstituteByPosition(this, text, length);
1810         else
1811                 return 0;
1812 }
1813
1814 int Document::LinesTotal() const {
1815         return cb.Lines();
1816 }
1817
1818 void Document::SetDefaultCharClasses(bool includeWordClass) {
1819     charClass.SetDefaultCharClasses(includeWordClass);
1820 }
1821
1822 void Document::SetCharClasses(const unsigned char *chars, CharClassify::cc newCharClass) {
1823     charClass.SetCharClasses(chars, newCharClass);
1824 }
1825
1826 int Document::GetCharsOfClass(CharClassify::cc characterClass, unsigned char *buffer) {
1827     return charClass.GetCharsOfClass(characterClass, buffer);
1828 }
1829
1830 void SCI_METHOD Document::StartStyling(Sci_Position position, char) {
1831         endStyled = position;
1832 }
1833
1834 bool SCI_METHOD Document::SetStyleFor(Sci_Position length, char style) {
1835         if (enteredStyling != 0) {
1836                 return false;
1837         } else {
1838                 enteredStyling++;
1839                 int prevEndStyled = endStyled;
1840                 if (cb.SetStyleFor(endStyled, length, style)) {
1841                         DocModification mh(SC_MOD_CHANGESTYLE | SC_PERFORMED_USER,
1842                                            prevEndStyled, length);
1843                         NotifyModified(mh);
1844                 }
1845                 endStyled += length;
1846                 enteredStyling--;
1847                 return true;
1848         }
1849 }
1850
1851 bool SCI_METHOD Document::SetStyles(Sci_Position length, const char *styles) {
1852         if (enteredStyling != 0) {
1853                 return false;
1854         } else {
1855                 enteredStyling++;
1856                 bool didChange = false;
1857                 int startMod = 0;
1858                 int endMod = 0;
1859                 for (int iPos = 0; iPos < length; iPos++, endStyled++) {
1860                         PLATFORM_ASSERT(endStyled < Length());
1861                         if (cb.SetStyleAt(endStyled, styles[iPos])) {
1862                                 if (!didChange) {
1863                                         startMod = endStyled;
1864                                 }
1865                                 didChange = true;
1866                                 endMod = endStyled;
1867                         }
1868                 }
1869                 if (didChange) {
1870                         DocModification mh(SC_MOD_CHANGESTYLE | SC_PERFORMED_USER,
1871                                            startMod, endMod - startMod + 1);
1872                         NotifyModified(mh);
1873                 }
1874                 enteredStyling--;
1875                 return true;
1876         }
1877 }
1878
1879 void Document::EnsureStyledTo(int pos) {
1880         if ((enteredStyling == 0) && (pos > GetEndStyled())) {
1881                 IncrementStyleClock();
1882                 if (pli && !pli->UseContainerLexing()) {
1883                         int lineEndStyled = LineFromPosition(GetEndStyled());
1884                         int endStyledTo = LineStart(lineEndStyled);
1885                         pli->Colourise(endStyledTo, pos);
1886                 } else {
1887                         // Ask the watchers to style, and stop as soon as one responds.
1888                         for (std::vector<WatcherWithUserData>::iterator it = watchers.begin();
1889                                 (pos > GetEndStyled()) && (it != watchers.end()); ++it) {
1890                                 it->watcher->NotifyStyleNeeded(this, it->userData, pos);
1891                         }
1892                 }
1893         }
1894 }
1895
1896 void Document::StyleToAdjustingLineDuration(int pos) {
1897         // Place bounds on the duration used to avoid glitches spiking it
1898         // and so causing slow styling or non-responsive scrolling
1899         const double minDurationOneLine = 0.000001;
1900         const double maxDurationOneLine = 0.0001;
1901
1902         // Alpha value for exponential smoothing.
1903         // Most recent value contributes 25% to smoothed value.
1904         const double alpha = 0.25;
1905
1906         const Sci_Position lineFirst = LineFromPosition(GetEndStyled());
1907         ElapsedTime etStyling;
1908         EnsureStyledTo(pos);
1909         const double durationStyling = etStyling.Duration();
1910         const Sci_Position lineLast = LineFromPosition(GetEndStyled());
1911         if (lineLast >= lineFirst + 8) {
1912                 // Only adjust for styling multiple lines to avoid instability
1913                 const double durationOneLine = durationStyling / (lineLast - lineFirst);
1914                 durationStyleOneLine = alpha * durationOneLine + (1.0 - alpha) * durationStyleOneLine;
1915                 if (durationStyleOneLine < minDurationOneLine) {
1916                         durationStyleOneLine = minDurationOneLine;
1917                 } else if (durationStyleOneLine > maxDurationOneLine) {
1918                         durationStyleOneLine = maxDurationOneLine;
1919                 }
1920         }
1921 }
1922
1923 void Document::LexerChanged() {
1924         // Tell the watchers the lexer has changed.
1925         for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
1926                 it->watcher->NotifyLexerChanged(this, it->userData);
1927         }
1928 }
1929
1930 int SCI_METHOD Document::SetLineState(Sci_Position line, int state) {
1931         int statePrevious = static_cast<LineState *>(perLineData[ldState])->SetLineState(line, state);
1932         if (state != statePrevious) {
1933                 DocModification mh(SC_MOD_CHANGELINESTATE, LineStart(line), 0, 0, 0, line);
1934                 NotifyModified(mh);
1935         }
1936         return statePrevious;
1937 }
1938
1939 int SCI_METHOD Document::GetLineState(Sci_Position line) const {
1940         return static_cast<LineState *>(perLineData[ldState])->GetLineState(line);
1941 }
1942
1943 int Document::GetMaxLineState() {
1944         return static_cast<LineState *>(perLineData[ldState])->GetMaxLineState();
1945 }
1946
1947 void SCI_METHOD Document::ChangeLexerState(Sci_Position start, Sci_Position end) {
1948         DocModification mh(SC_MOD_LEXERSTATE, start, end-start, 0, 0, 0);
1949         NotifyModified(mh);
1950 }
1951
1952 StyledText Document::MarginStyledText(int line) const {
1953         LineAnnotation *pla = static_cast<LineAnnotation *>(perLineData[ldMargin]);
1954         return StyledText(pla->Length(line), pla->Text(line),
1955                 pla->MultipleStyles(line), pla->Style(line), pla->Styles(line));
1956 }
1957
1958 void Document::MarginSetText(int line, const char *text) {
1959         static_cast<LineAnnotation *>(perLineData[ldMargin])->SetText(line, text);
1960         DocModification mh(SC_MOD_CHANGEMARGIN, LineStart(line), 0, 0, 0, line);
1961         NotifyModified(mh);
1962 }
1963
1964 void Document::MarginSetStyle(int line, int style) {
1965         static_cast<LineAnnotation *>(perLineData[ldMargin])->SetStyle(line, style);
1966         NotifyModified(DocModification(SC_MOD_CHANGEMARGIN, LineStart(line), 0, 0, 0, line));
1967 }
1968
1969 void Document::MarginSetStyles(int line, const unsigned char *styles) {
1970         static_cast<LineAnnotation *>(perLineData[ldMargin])->SetStyles(line, styles);
1971         NotifyModified(DocModification(SC_MOD_CHANGEMARGIN, LineStart(line), 0, 0, 0, line));
1972 }
1973
1974 void Document::MarginClearAll() {
1975         int maxEditorLine = LinesTotal();
1976         for (int l=0; l<maxEditorLine; l++)
1977                 MarginSetText(l, 0);
1978         // Free remaining data
1979         static_cast<LineAnnotation *>(perLineData[ldMargin])->ClearAll();
1980 }
1981
1982 StyledText Document::AnnotationStyledText(int line) const {
1983         LineAnnotation *pla = static_cast<LineAnnotation *>(perLineData[ldAnnotation]);
1984         return StyledText(pla->Length(line), pla->Text(line),
1985                 pla->MultipleStyles(line), pla->Style(line), pla->Styles(line));
1986 }
1987
1988 void Document::AnnotationSetText(int line, const char *text) {
1989         if (line >= 0 && line < LinesTotal()) {
1990                 const int linesBefore = AnnotationLines(line);
1991                 static_cast<LineAnnotation *>(perLineData[ldAnnotation])->SetText(line, text);
1992                 const int linesAfter = AnnotationLines(line);
1993                 DocModification mh(SC_MOD_CHANGEANNOTATION, LineStart(line), 0, 0, 0, line);
1994                 mh.annotationLinesAdded = linesAfter - linesBefore;
1995                 NotifyModified(mh);
1996         }
1997 }
1998
1999 void Document::AnnotationSetStyle(int line, int style) {
2000         static_cast<LineAnnotation *>(perLineData[ldAnnotation])->SetStyle(line, style);
2001         DocModification mh(SC_MOD_CHANGEANNOTATION, LineStart(line), 0, 0, 0, line);
2002         NotifyModified(mh);
2003 }
2004
2005 void Document::AnnotationSetStyles(int line, const unsigned char *styles) {
2006         if (line >= 0 && line < LinesTotal()) {
2007                 static_cast<LineAnnotation *>(perLineData[ldAnnotation])->SetStyles(line, styles);
2008         }
2009 }
2010
2011 int Document::AnnotationLines(int line) const {
2012         return static_cast<LineAnnotation *>(perLineData[ldAnnotation])->Lines(line);
2013 }
2014
2015 void Document::AnnotationClearAll() {
2016         int maxEditorLine = LinesTotal();
2017         for (int l=0; l<maxEditorLine; l++)
2018                 AnnotationSetText(l, 0);
2019         // Free remaining data
2020         static_cast<LineAnnotation *>(perLineData[ldAnnotation])->ClearAll();
2021 }
2022
2023 void Document::IncrementStyleClock() {
2024         styleClock = (styleClock + 1) % 0x100000;
2025 }
2026
2027 void SCI_METHOD Document::DecorationFillRange(Sci_Position position, int value, Sci_Position fillLength) {
2028         if (decorations.FillRange(position, value, fillLength)) {
2029                 DocModification mh(SC_MOD_CHANGEINDICATOR | SC_PERFORMED_USER,
2030                                                         position, fillLength);
2031                 NotifyModified(mh);
2032         }
2033 }
2034
2035 bool Document::AddWatcher(DocWatcher *watcher, void *userData) {
2036         WatcherWithUserData wwud(watcher, userData);
2037         std::vector<WatcherWithUserData>::iterator it =
2038                 std::find(watchers.begin(), watchers.end(), wwud);
2039         if (it != watchers.end())
2040                 return false;
2041         watchers.push_back(wwud);
2042         return true;
2043 }
2044
2045 bool Document::RemoveWatcher(DocWatcher *watcher, void *userData) {
2046         std::vector<WatcherWithUserData>::iterator it =
2047                 std::find(watchers.begin(), watchers.end(), WatcherWithUserData(watcher, userData));
2048         if (it != watchers.end()) {
2049                 watchers.erase(it);
2050                 return true;
2051         }
2052         return false;
2053 }
2054
2055 void Document::NotifyModifyAttempt() {
2056         for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
2057                 it->watcher->NotifyModifyAttempt(this, it->userData);
2058         }
2059 }
2060
2061 void Document::NotifySavePoint(bool atSavePoint) {
2062         for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
2063                 it->watcher->NotifySavePoint(this, it->userData, atSavePoint);
2064         }
2065 }
2066
2067 void Document::NotifyModified(DocModification mh) {
2068         if (mh.modificationType & SC_MOD_INSERTTEXT) {
2069                 decorations.InsertSpace(mh.position, mh.length);
2070         } else if (mh.modificationType & SC_MOD_DELETETEXT) {
2071                 decorations.DeleteRange(mh.position, mh.length);
2072         }
2073         for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
2074                 it->watcher->NotifyModified(this, mh, it->userData);
2075         }
2076 }
2077
2078 bool Document::IsWordPartSeparator(char ch) const {
2079         return (WordCharClass(ch) == CharClassify::ccWord) && IsPunctuation(ch);
2080 }
2081
2082 int Document::WordPartLeft(int pos) {
2083         if (pos > 0) {
2084                 --pos;
2085                 char startChar = cb.CharAt(pos);
2086                 if (IsWordPartSeparator(startChar)) {
2087                         while (pos > 0 && IsWordPartSeparator(cb.CharAt(pos))) {
2088                                 --pos;
2089                         }
2090                 }
2091                 if (pos > 0) {
2092                         startChar = cb.CharAt(pos);
2093                         --pos;
2094                         if (IsLowerCase(startChar)) {
2095                                 while (pos > 0 && IsLowerCase(cb.CharAt(pos)))
2096                                         --pos;
2097                                 if (!IsUpperCase(cb.CharAt(pos)) && !IsLowerCase(cb.CharAt(pos)))
2098                                         ++pos;
2099                         } else if (IsUpperCase(startChar)) {
2100                                 while (pos > 0 && IsUpperCase(cb.CharAt(pos)))
2101                                         --pos;
2102                                 if (!IsUpperCase(cb.CharAt(pos)))
2103                                         ++pos;
2104                         } else if (IsADigit(startChar)) {
2105                                 while (pos > 0 && IsADigit(cb.CharAt(pos)))
2106                                         --pos;
2107                                 if (!IsADigit(cb.CharAt(pos)))
2108                                         ++pos;
2109                         } else if (IsPunctuation(startChar)) {
2110                                 while (pos > 0 && IsPunctuation(cb.CharAt(pos)))
2111                                         --pos;
2112                                 if (!IsPunctuation(cb.CharAt(pos)))
2113                                         ++pos;
2114                         } else if (isspacechar(startChar)) {
2115                                 while (pos > 0 && isspacechar(cb.CharAt(pos)))
2116                                         --pos;
2117                                 if (!isspacechar(cb.CharAt(pos)))
2118                                         ++pos;
2119                         } else if (!IsASCII(startChar)) {
2120                                 while (pos > 0 && !IsASCII(cb.CharAt(pos)))
2121                                         --pos;
2122                                 if (IsASCII(cb.CharAt(pos)))
2123                                         ++pos;
2124                         } else {
2125                                 ++pos;
2126                         }
2127                 }
2128         }
2129         return pos;
2130 }
2131
2132 int Document::WordPartRight(int pos) {
2133         char startChar = cb.CharAt(pos);
2134         int length = Length();
2135         if (IsWordPartSeparator(startChar)) {
2136                 while (pos < length && IsWordPartSeparator(cb.CharAt(pos)))
2137                         ++pos;
2138                 startChar = cb.CharAt(pos);
2139         }
2140         if (!IsASCII(startChar)) {
2141                 while (pos < length && !IsASCII(cb.CharAt(pos)))
2142                         ++pos;
2143         } else if (IsLowerCase(startChar)) {
2144                 while (pos < length && IsLowerCase(cb.CharAt(pos)))
2145                         ++pos;
2146         } else if (IsUpperCase(startChar)) {
2147                 if (IsLowerCase(cb.CharAt(pos + 1))) {
2148                         ++pos;
2149                         while (pos < length && IsLowerCase(cb.CharAt(pos)))
2150                                 ++pos;
2151                 } else {
2152                         while (pos < length && IsUpperCase(cb.CharAt(pos)))
2153                                 ++pos;
2154                 }
2155                 if (IsLowerCase(cb.CharAt(pos)) && IsUpperCase(cb.CharAt(pos - 1)))
2156                         --pos;
2157         } else if (IsADigit(startChar)) {
2158                 while (pos < length && IsADigit(cb.CharAt(pos)))
2159                         ++pos;
2160         } else if (IsPunctuation(startChar)) {
2161                 while (pos < length && IsPunctuation(cb.CharAt(pos)))
2162                         ++pos;
2163         } else if (isspacechar(startChar)) {
2164                 while (pos < length && isspacechar(cb.CharAt(pos)))
2165                         ++pos;
2166         } else {
2167                 ++pos;
2168         }
2169         return pos;
2170 }
2171
2172 static bool IsLineEndChar(char c) {
2173         return (c == '\n' || c == '\r');
2174 }
2175
2176 int Document::ExtendStyleRange(int pos, int delta, bool singleLine) {
2177         int sStart = cb.StyleAt(pos);
2178         if (delta < 0) {
2179                 while (pos > 0 && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsLineEndChar(cb.CharAt(pos))))
2180                         pos--;
2181                 pos++;
2182         } else {
2183                 while (pos < (Length()) && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsLineEndChar(cb.CharAt(pos))))
2184                         pos++;
2185         }
2186         return pos;
2187 }
2188
2189 static char BraceOpposite(char ch) {
2190         switch (ch) {
2191         case '(':
2192                 return ')';
2193         case ')':
2194                 return '(';
2195         case '[':
2196                 return ']';
2197         case ']':
2198                 return '[';
2199         case '{':
2200                 return '}';
2201         case '}':
2202                 return '{';
2203         case '<':
2204                 return '>';
2205         case '>':
2206                 return '<';
2207         default:
2208                 return '\0';
2209         }
2210 }
2211
2212 // TODO: should be able to extend styled region to find matching brace
2213 int Document::BraceMatch(int position, int /*maxReStyle*/) {
2214         char chBrace = CharAt(position);
2215         char chSeek = BraceOpposite(chBrace);
2216         if (chSeek == '\0')
2217                 return - 1;
2218         const int styBrace = StyleIndexAt(position);
2219         int direction = -1;
2220         if (chBrace == '(' || chBrace == '[' || chBrace == '{' || chBrace == '<')
2221                 direction = 1;
2222         int depth = 1;
2223         position = NextPosition(position, direction);
2224         while ((position >= 0) && (position < Length())) {
2225                 char chAtPos = CharAt(position);
2226                 const int styAtPos = StyleIndexAt(position);
2227                 if ((position > GetEndStyled()) || (styAtPos == styBrace)) {
2228                         if (chAtPos == chBrace)
2229                                 depth++;
2230                         if (chAtPos == chSeek)
2231                                 depth--;
2232                         if (depth == 0)
2233                                 return position;
2234                 }
2235                 int positionBeforeMove = position;
2236                 position = NextPosition(position, direction);
2237                 if (position == positionBeforeMove)
2238                         break;
2239         }
2240         return - 1;
2241 }
2242
2243 /**
2244  * Implementation of RegexSearchBase for the default built-in regular expression engine
2245  */
2246 class BuiltinRegex : public RegexSearchBase {
2247 public:
2248         explicit BuiltinRegex(CharClassify *charClassTable) : search(charClassTable) {}
2249
2250         virtual ~BuiltinRegex() {
2251         }
2252
2253         virtual long FindText(Document *doc, int minPos, int maxPos, const char *s,
2254                         bool caseSensitive, bool word, bool wordStart, int flags,
2255                         int *length);
2256
2257         virtual const char *SubstituteByPosition(Document *doc, const char *text, int *length);
2258
2259 private:
2260         RESearch search;
2261         std::string substituted;
2262 };
2263
2264 namespace {
2265
2266 /**
2267 * RESearchRange keeps track of search range.
2268 */
2269 class RESearchRange {
2270 public:
2271         const Document *doc;
2272         int increment;
2273         int startPos;
2274         int endPos;
2275         int lineRangeStart;
2276         int lineRangeEnd;
2277         int lineRangeBreak;
2278         RESearchRange(const Document *doc_, int minPos, int maxPos) : doc(doc_) {
2279                 increment = (minPos <= maxPos) ? 1 : -1;
2280
2281                 // Range endpoints should not be inside DBCS characters, but just in case, move them.
2282                 startPos = doc->MovePositionOutsideChar(minPos, 1, false);
2283                 endPos = doc->MovePositionOutsideChar(maxPos, 1, false);
2284
2285                 lineRangeStart = doc->LineFromPosition(startPos);
2286                 lineRangeEnd = doc->LineFromPosition(endPos);
2287                 if ((increment == 1) &&
2288                         (startPos >= doc->LineEnd(lineRangeStart)) &&
2289                         (lineRangeStart < lineRangeEnd)) {
2290                         // the start position is at end of line or between line end characters.
2291                         lineRangeStart++;
2292                         startPos = doc->LineStart(lineRangeStart);
2293                 } else if ((increment == -1) &&
2294                         (startPos <= doc->LineStart(lineRangeStart)) &&
2295                         (lineRangeStart > lineRangeEnd)) {
2296                         // the start position is at beginning of line.
2297                         lineRangeStart--;
2298                         startPos = doc->LineEnd(lineRangeStart);
2299                 }
2300                 lineRangeBreak = lineRangeEnd + increment;
2301         }
2302         Range LineRange(int line) const {
2303                 Range range(doc->LineStart(line), doc->LineEnd(line));
2304                 if (increment == 1) {
2305                         if (line == lineRangeStart)
2306                                 range.start = startPos;
2307                         if (line == lineRangeEnd)
2308                                 range.end = endPos;
2309                 } else {
2310                         if (line == lineRangeEnd)
2311                                 range.start = endPos;
2312                         if (line == lineRangeStart)
2313                                 range.end = startPos;
2314                 }
2315                 return range;
2316         }
2317 };
2318
2319 // Define a way for the Regular Expression code to access the document
2320 class DocumentIndexer : public CharacterIndexer {
2321         Document *pdoc;
2322         int end;
2323 public:
2324         DocumentIndexer(Document *pdoc_, int end_) :
2325                 pdoc(pdoc_), end(end_) {
2326         }
2327
2328         virtual ~DocumentIndexer() {
2329         }
2330
2331         virtual char CharAt(int index) {
2332                 if (index < 0 || index >= end)
2333                         return 0;
2334                 else
2335                         return pdoc->CharAt(index);
2336         }
2337 };
2338
2339 #ifdef CXX11_REGEX
2340
2341 class ByteIterator : public std::iterator<std::bidirectional_iterator_tag, char> {
2342 public:
2343         const Document *doc;
2344         Position position;
2345         ByteIterator(const Document *doc_ = 0, Position position_ = 0) : doc(doc_), position(position_) {
2346         }
2347         ByteIterator(const ByteIterator &other) {
2348                 doc = other.doc;
2349                 position = other.position;
2350         }
2351         ByteIterator &operator=(const ByteIterator &other) {
2352                 if (this != &other) {
2353                         doc = other.doc;
2354                         position = other.position;
2355                 }
2356                 return *this;
2357         }
2358         char operator*() const {
2359                 return doc->CharAt(position);
2360         }
2361         ByteIterator &operator++() {
2362                 position++;
2363                 return *this;
2364         }
2365         ByteIterator operator++(int) {
2366                 ByteIterator retVal(*this);
2367                 position++;
2368                 return retVal;
2369         }
2370         ByteIterator &operator--() {
2371                 position--;
2372                 return *this;
2373         }
2374         bool operator==(const ByteIterator &other) const {
2375                 return doc == other.doc && position == other.position;
2376         }
2377         bool operator!=(const ByteIterator &other) const {
2378                 return doc != other.doc || position != other.position;
2379         }
2380         int Pos() const {
2381                 return position;
2382         }
2383         int PosRoundUp() const {
2384                 return position;
2385         }
2386 };
2387
2388 // On Windows, wchar_t is 16 bits wide and on Unix it is 32 bits wide.
2389 // Would be better to use sizeof(wchar_t) or similar to differentiate
2390 // but easier for now to hard-code platforms.
2391 // C++11 has char16_t and char32_t but neither Clang nor Visual C++
2392 // appear to allow specializing basic_regex over these.
2393
2394 #ifdef _WIN32
2395 #define WCHAR_T_IS_16 1
2396 #else
2397 #define WCHAR_T_IS_16 0
2398 #endif
2399
2400 #if WCHAR_T_IS_16
2401
2402 // On Windows, report non-BMP characters as 2 separate surrogates as that
2403 // matches wregex since it is based on wchar_t.
2404 class UTF8Iterator : public std::iterator<std::bidirectional_iterator_tag, wchar_t> {
2405         // These 3 fields determine the iterator position and are used for comparisons
2406         const Document *doc;
2407         Position position;
2408         size_t characterIndex;
2409         // Remaining fields are derived from the determining fields so are excluded in comparisons
2410         unsigned int lenBytes;
2411         size_t lenCharacters;
2412         wchar_t buffered[2];
2413 public:
2414         UTF8Iterator(const Document *doc_ = 0, Position position_ = 0) :
2415                 doc(doc_), position(position_), characterIndex(0), lenBytes(0), lenCharacters(0) {
2416                 buffered[0] = 0;
2417                 buffered[1] = 0;
2418                 if (doc) {
2419                         ReadCharacter();
2420                 }
2421         }
2422         UTF8Iterator(const UTF8Iterator &other) {
2423                 doc = other.doc;
2424                 position = other.position;
2425                 characterIndex = other.characterIndex;
2426                 lenBytes = other.lenBytes;
2427                 lenCharacters = other.lenCharacters;
2428                 buffered[0] = other.buffered[0];
2429                 buffered[1] = other.buffered[1];
2430         }
2431         UTF8Iterator &operator=(const UTF8Iterator &other) {
2432                 if (this != &other) {
2433                         doc = other.doc;
2434                         position = other.position;
2435                         characterIndex = other.characterIndex;
2436                         lenBytes = other.lenBytes;
2437                         lenCharacters = other.lenCharacters;
2438                         buffered[0] = other.buffered[0];
2439                         buffered[1] = other.buffered[1];
2440                 }
2441                 return *this;
2442         }
2443         wchar_t operator*() const {
2444                 assert(lenCharacters != 0);
2445                 return buffered[characterIndex];
2446         }
2447         UTF8Iterator &operator++() {
2448                 if ((characterIndex + 1) < (lenCharacters)) {
2449                         characterIndex++;
2450                 } else {
2451                         position += lenBytes;
2452                         ReadCharacter();
2453                         characterIndex = 0;
2454                 }
2455                 return *this;
2456         }
2457         UTF8Iterator operator++(int) {
2458                 UTF8Iterator retVal(*this);
2459                 if ((characterIndex + 1) < (lenCharacters)) {
2460                         characterIndex++;
2461                 } else {
2462                         position += lenBytes;
2463                         ReadCharacter();
2464                         characterIndex = 0;
2465                 }
2466                 return retVal;
2467         }
2468         UTF8Iterator &operator--() {
2469                 if (characterIndex) {
2470                         characterIndex--;
2471                 } else {
2472                         position = doc->NextPosition(position, -1);
2473                         ReadCharacter();
2474                         characterIndex = lenCharacters - 1;
2475                 }
2476                 return *this;
2477         }
2478         bool operator==(const UTF8Iterator &other) const {
2479                 // Only test the determining fields, not the character widths and values derived from this
2480                 return doc == other.doc &&
2481                         position == other.position &&
2482                         characterIndex == other.characterIndex;
2483         }
2484         bool operator!=(const UTF8Iterator &other) const {
2485                 // Only test the determining fields, not the character widths and values derived from this
2486                 return doc != other.doc ||
2487                         position != other.position ||
2488                         characterIndex != other.characterIndex;
2489         }
2490         int Pos() const {
2491                 return position;
2492         }
2493         int PosRoundUp() const {
2494                 if (characterIndex)
2495                         return position + lenBytes;     // Force to end of character
2496                 else
2497                         return position;
2498         }
2499 private:
2500         void ReadCharacter() {
2501                 Document::CharacterExtracted charExtracted = doc->ExtractCharacter(position);
2502                 lenBytes = charExtracted.widthBytes;
2503                 if (charExtracted.character == unicodeReplacementChar) {
2504                         lenCharacters = 1;
2505                         buffered[0] = static_cast<wchar_t>(charExtracted.character);
2506                 } else {
2507                         lenCharacters = UTF16FromUTF32Character(charExtracted.character, buffered);
2508                 }
2509         }
2510 };
2511
2512 #else
2513
2514 // On Unix, report non-BMP characters as single characters
2515
2516 class UTF8Iterator : public std::iterator<std::bidirectional_iterator_tag, wchar_t> {
2517         const Document *doc;
2518         Position position;
2519 public:
2520         UTF8Iterator(const Document *doc_=0, Position position_=0) : doc(doc_), position(position_) {
2521         }
2522         UTF8Iterator(const UTF8Iterator &other) {
2523                 doc = other.doc;
2524                 position = other.position;
2525         }
2526         UTF8Iterator &operator=(const UTF8Iterator &other) {
2527                 if (this != &other) {
2528                         doc = other.doc;
2529                         position = other.position;
2530                 }
2531                 return *this;
2532         }
2533         wchar_t operator*() const {
2534                 Document::CharacterExtracted charExtracted = doc->ExtractCharacter(position);
2535                 return charExtracted.character;
2536         }
2537         UTF8Iterator &operator++() {
2538                 position = doc->NextPosition(position, 1);
2539                 return *this;
2540         }
2541         UTF8Iterator operator++(int) {
2542                 UTF8Iterator retVal(*this);
2543                 position = doc->NextPosition(position, 1);
2544                 return retVal;
2545         }
2546         UTF8Iterator &operator--() {
2547                 position = doc->NextPosition(position, -1);
2548                 return *this;
2549         }
2550         bool operator==(const UTF8Iterator &other) const {
2551                 return doc == other.doc && position == other.position;
2552         }
2553         bool operator!=(const UTF8Iterator &other) const {
2554                 return doc != other.doc || position != other.position;
2555         }
2556         int Pos() const {
2557                 return position;
2558         }
2559         int PosRoundUp() const {
2560                 return position;
2561         }
2562 };
2563
2564 #endif
2565
2566 std::regex_constants::match_flag_type MatchFlags(const Document *doc, int startPos, int endPos) {
2567         std::regex_constants::match_flag_type flagsMatch = std::regex_constants::match_default;
2568         if (!doc->IsLineStartPosition(startPos))
2569                 flagsMatch |= std::regex_constants::match_not_bol;
2570         if (!doc->IsLineEndPosition(endPos))
2571                 flagsMatch |= std::regex_constants::match_not_eol;
2572         return flagsMatch;
2573 }
2574
2575 template<typename Iterator, typename Regex>
2576 bool MatchOnLines(const Document *doc, const Regex &regexp, const RESearchRange &resr, RESearch &search) {
2577         bool matched = false;
2578         std::match_results<Iterator> match;
2579
2580         // MSVC and libc++ have problems with ^ and $ matching line ends inside a range
2581         // If they didn't then the line by line iteration could be removed for the forwards
2582         // case and replaced with the following 4 lines:
2583         //      Iterator uiStart(doc, startPos);
2584         //      Iterator uiEnd(doc, endPos);
2585         //      flagsMatch = MatchFlags(doc, startPos, endPos);
2586         //      matched = std::regex_search(uiStart, uiEnd, match, regexp, flagsMatch);
2587
2588         // Line by line.
2589         for (int line = resr.lineRangeStart; line != resr.lineRangeBreak; line += resr.increment) {
2590                 const Range lineRange = resr.LineRange(line);
2591                 Iterator itStart(doc, lineRange.start);
2592                 Iterator itEnd(doc, lineRange.end);
2593                 std::regex_constants::match_flag_type flagsMatch = MatchFlags(doc, lineRange.start, lineRange.end);
2594                 matched = std::regex_search(itStart, itEnd, match, regexp, flagsMatch);
2595                 // Check for the last match on this line.
2596                 if (matched) {
2597                         if (resr.increment == -1) {
2598                                 while (matched) {
2599                                         Iterator itNext(doc, match[0].second.PosRoundUp());
2600                                         flagsMatch = MatchFlags(doc, itNext.Pos(), lineRange.end);
2601                                         std::match_results<Iterator> matchNext;
2602                                         matched = std::regex_search(itNext, itEnd, matchNext, regexp, flagsMatch);
2603                                         if (matched) {
2604                                                 if (match[0].first == match[0].second) {
2605                                                         // Empty match means failure so exit
2606                                                         return false;
2607                                                 }
2608                                                 match = matchNext;
2609                                         }
2610                                 }
2611                                 matched = true;
2612                         }
2613                         break;
2614                 }
2615         }
2616         if (matched) {
2617                 for (size_t co = 0; co < match.size(); co++) {
2618                         search.bopat[co] = match[co].first.Pos();
2619                         search.eopat[co] = match[co].second.PosRoundUp();
2620                         size_t lenMatch = search.eopat[co] - search.bopat[co];
2621                         search.pat[co].resize(lenMatch);
2622                         for (size_t iPos = 0; iPos < lenMatch; iPos++) {
2623                                 search.pat[co][iPos] = doc->CharAt(iPos + search.bopat[co]);
2624                         }
2625                 }
2626         }
2627         return matched;
2628 }
2629
2630 long Cxx11RegexFindText(Document *doc, int minPos, int maxPos, const char *s,
2631         bool caseSensitive, int *length, RESearch &search) {
2632         const RESearchRange resr(doc, minPos, maxPos);
2633         try {
2634                 //ElapsedTime et;
2635                 std::regex::flag_type flagsRe = std::regex::ECMAScript;
2636                 // Flags that apper to have no effect:
2637                 // | std::regex::collate | std::regex::extended;
2638                 if (!caseSensitive)
2639                         flagsRe = flagsRe | std::regex::icase;
2640
2641                 // Clear the RESearch so can fill in matches
2642                 search.Clear();
2643
2644                 bool matched = false;
2645                 if (SC_CP_UTF8 == doc->dbcsCodePage) {
2646                         unsigned int lenS = static_cast<unsigned int>(strlen(s));
2647                         std::vector<wchar_t> ws(lenS + 1);
2648 #if WCHAR_T_IS_16
2649                         size_t outLen = UTF16FromUTF8(s, lenS, &ws[0], lenS);
2650 #else
2651                         size_t outLen = UTF32FromUTF8(s, lenS, reinterpret_cast<unsigned int *>(&ws[0]), lenS);
2652 #endif
2653                         ws[outLen] = 0;
2654                         std::wregex regexp;
2655 #if defined(__APPLE__)
2656                         // Using a UTF-8 locale doesn't change to Unicode over a byte buffer so '.'
2657                         // is one byte not one character.
2658                         // However, on OS X this makes wregex act as Unicode
2659                         std::locale localeU("en_US.UTF-8");
2660                         regexp.imbue(localeU);
2661 #endif
2662                         regexp.assign(&ws[0], flagsRe);
2663                         matched = MatchOnLines<UTF8Iterator>(doc, regexp, resr, search);
2664
2665                 } else {
2666                         std::regex regexp;
2667                         regexp.assign(s, flagsRe);
2668                         matched = MatchOnLines<ByteIterator>(doc, regexp, resr, search);
2669                 }
2670
2671                 int posMatch = -1;
2672                 if (matched) {
2673                         posMatch = search.bopat[0];
2674                         *length = search.eopat[0] - search.bopat[0];
2675                 }
2676                 // Example - search in doc/ScintillaHistory.html for
2677                 // [[:upper:]]eta[[:space:]]
2678                 // On MacBook, normally around 1 second but with locale imbued -> 14 seconds.
2679                 //double durSearch = et.Duration(true);
2680                 //Platform::DebugPrintf("Search:%9.6g \n", durSearch);
2681                 return posMatch;
2682         } catch (std::regex_error &) {
2683                 // Failed to create regular expression
2684                 throw RegexError();
2685         } catch (...) {
2686                 // Failed in some other way
2687                 return -1;
2688         }
2689 }
2690
2691 #endif
2692
2693 }
2694
2695 long BuiltinRegex::FindText(Document *doc, int minPos, int maxPos, const char *s,
2696                         bool caseSensitive, bool, bool, int flags,
2697                         int *length) {
2698
2699 #ifdef CXX11_REGEX
2700         if (flags & SCFIND_CXX11REGEX) {
2701                         return Cxx11RegexFindText(doc, minPos, maxPos, s,
2702                         caseSensitive, length, search);
2703         }
2704 #endif
2705
2706         const RESearchRange resr(doc, minPos, maxPos);
2707
2708         const bool posix = (flags & SCFIND_POSIX) != 0;
2709
2710         const char *errmsg = search.Compile(s, *length, caseSensitive, posix);
2711         if (errmsg) {
2712                 return -1;
2713         }
2714         // Find a variable in a property file: \$(\([A-Za-z0-9_.]+\))
2715         // Replace first '.' with '-' in each property file variable reference:
2716         //     Search: \$(\([A-Za-z0-9_-]+\)\.\([A-Za-z0-9_.]+\))
2717         //     Replace: $(\1-\2)
2718         int pos = -1;
2719         int lenRet = 0;
2720         const char searchEnd = s[*length - 1];
2721         const char searchEndPrev = (*length > 1) ? s[*length - 2] : '\0';
2722         for (int line = resr.lineRangeStart; line != resr.lineRangeBreak; line += resr.increment) {
2723                 int startOfLine = doc->LineStart(line);
2724                 int endOfLine = doc->LineEnd(line);
2725                 if (resr.increment == 1) {
2726                         if (line == resr.lineRangeStart) {
2727                                 if ((resr.startPos != startOfLine) && (s[0] == '^'))
2728                                         continue;       // Can't match start of line if start position after start of line
2729                                 startOfLine = resr.startPos;
2730                         }
2731                         if (line == resr.lineRangeEnd) {
2732                                 if ((resr.endPos != endOfLine) && (searchEnd == '$') && (searchEndPrev != '\\'))
2733                                         continue;       // Can't match end of line if end position before end of line
2734                                 endOfLine = resr.endPos;
2735                         }
2736                 } else {
2737                         if (line == resr.lineRangeEnd) {
2738                                 if ((resr.endPos != startOfLine) && (s[0] == '^'))
2739                                         continue;       // Can't match start of line if end position after start of line
2740                                 startOfLine = resr.endPos;
2741                         }
2742                         if (line == resr.lineRangeStart) {
2743                                 if ((resr.startPos != endOfLine) && (searchEnd == '$') && (searchEndPrev != '\\'))
2744                                         continue;       // Can't match end of line if start position before end of line
2745                                 endOfLine = resr.startPos;
2746                         }
2747                 }
2748
2749                 DocumentIndexer di(doc, endOfLine);
2750                 int success = search.Execute(di, startOfLine, endOfLine);
2751                 if (success) {
2752                         pos = search.bopat[0];
2753                         // Ensure only whole characters selected
2754                         search.eopat[0] = doc->MovePositionOutsideChar(search.eopat[0], 1, false);
2755                         lenRet = search.eopat[0] - search.bopat[0];
2756                         // There can be only one start of a line, so no need to look for last match in line
2757                         if ((resr.increment == -1) && (s[0] != '^')) {
2758                                 // Check for the last match on this line.
2759                                 int repetitions = 1000; // Break out of infinite loop
2760                                 while (success && (search.eopat[0] <= endOfLine) && (repetitions--)) {
2761                                         success = search.Execute(di, pos+1, endOfLine);
2762                                         if (success) {
2763                                                 if (search.eopat[0] <= minPos) {
2764                                                         pos = search.bopat[0];
2765                                                         lenRet = search.eopat[0] - search.bopat[0];
2766                                                 } else {
2767                                                         success = 0;
2768                                                 }
2769                                         }
2770                                 }
2771                         }
2772                         break;
2773                 }
2774         }
2775         *length = lenRet;
2776         return pos;
2777 }
2778
2779 const char *BuiltinRegex::SubstituteByPosition(Document *doc, const char *text, int *length) {
2780         substituted.clear();
2781         DocumentIndexer di(doc, doc->Length());
2782         search.GrabMatches(di);
2783         for (int j = 0; j < *length; j++) {
2784                 if (text[j] == '\\') {
2785                         if (text[j + 1] >= '0' && text[j + 1] <= '9') {
2786                                 unsigned int patNum = text[j + 1] - '0';
2787                                 unsigned int len = search.eopat[patNum] - search.bopat[patNum];
2788                                 if (!search.pat[patNum].empty())        // Will be null if try for a match that did not occur
2789                                         substituted.append(search.pat[patNum].c_str(), len);
2790                                 j++;
2791                         } else {
2792                                 j++;
2793                                 switch (text[j]) {
2794                                 case 'a':
2795                                         substituted.push_back('\a');
2796                                         break;
2797                                 case 'b':
2798                                         substituted.push_back('\b');
2799                                         break;
2800                                 case 'f':
2801                                         substituted.push_back('\f');
2802                                         break;
2803                                 case 'n':
2804                                         substituted.push_back('\n');
2805                                         break;
2806                                 case 'r':
2807                                         substituted.push_back('\r');
2808                                         break;
2809                                 case 't':
2810                                         substituted.push_back('\t');
2811                                         break;
2812                                 case 'v':
2813                                         substituted.push_back('\v');
2814                                         break;
2815                                 case '\\':
2816                                         substituted.push_back('\\');
2817                                         break;
2818                                 default:
2819                                         substituted.push_back('\\');
2820                                         j--;
2821                                 }
2822                         }
2823                 } else {
2824                         substituted.push_back(text[j]);
2825                 }
2826         }
2827         *length = static_cast<int>(substituted.length());
2828         return substituted.c_str();
2829 }
2830
2831 #ifndef SCI_OWNREGEX
2832
2833 #ifdef SCI_NAMESPACE
2834
2835 RegexSearchBase *Scintilla::CreateRegexSearch(CharClassify *charClassTable) {
2836         return new BuiltinRegex(charClassTable);
2837 }
2838
2839 #else
2840
2841 RegexSearchBase *CreateRegexSearch(CharClassify *charClassTable) {
2842         return new BuiltinRegex(charClassTable);
2843 }
2844
2845 #endif
2846
2847 #endif