scintilla/src/Document.cxx

   1 // Scintilla source code edit control
   2 /** @file Document.cxx
   3  ** Text document that handles notifications, DBCS, styling, words and end of line.
   4  **/
   5 // Copyright 1998-2011 by Neil Hodgson <neilh@scintilla.org>
   6 // The License.txt file describes the conditions under which this software may be distributed.
   7
   8 #include <stdlib.h>
   9 #include <string.h>
  10 #include <stdio.h>
  11 #include <assert.h>
  12 #include <ctype.h>
  13
  14 #include <stdexcept>
  15 #include <string>
  16 #include <vector>
  17 #include <algorithm>
  18
  19 #ifdef CXX11_REGEX
  20 #include <regex>
  21 #endif
  22
  23 #include "Platform.h"
  24
  25 #include "ILexer.h"
  26 #include "Scintilla.h"
  27
  28 #include "CharacterSet.h"
  29 #include "Position.h"
  30 #include "SplitVector.h"
  31 #include "Partitioning.h"
  32 #include "RunStyles.h"
  33 #include "CellBuffer.h"
  34 #include "PerLine.h"
  35 #include "CharClassify.h"
  36 #include "Decoration.h"
  37 #include "CaseFolder.h"
  38 #include "Document.h"
  39 #include "RESearch.h"
  40 #include "UniConversion.h"
  41 #include "UnicodeFromUTF8.h"
  42
  43 #ifdef SCI_NAMESPACE
  44 using namespace Scintilla;
  45 #endif
  46
  47 static inline bool IsPunctuation(char ch) {
  48         return IsASCII(ch) && ispunct(ch);
  49 }
  50
  51 void LexInterface::Colourise(int start, int end) {
  52         if (pdoc && instance && !performingStyle) {
  53                 // Protect against reentrance, which may occur, for example, when
  54                 // fold points are discovered while performing styling and the folding
  55                 // code looks for child lines which may trigger styling.
  56                 performingStyle = true;
  57
  58                 int lengthDoc = pdoc->Length();
  59                 if (end == -1)
  60                         end = lengthDoc;
  61                 int len = end - start;
  62
  63                 PLATFORM_ASSERT(len >= 0);
  64                 PLATFORM_ASSERT(start + len <= lengthDoc);
  65
  66                 int styleStart = 0;
  67                 if (start > 0)
  68                         styleStart = pdoc->StyleAt(start - 1);
  69
  70                 if (len > 0) {
  71                         instance->Lex(start, len, styleStart, pdoc);
  72                         instance->Fold(start, len, styleStart, pdoc);
  73                 }
  74
  75                 performingStyle = false;
  76         }
  77 }
  78
  79 int LexInterface::LineEndTypesSupported() {
  80         if (instance) {
  81                 int interfaceVersion = instance->Version();
  82                 if (interfaceVersion >= lvSubStyles) {
  83                         ILexerWithSubStyles *ssinstance = static_cast<ILexerWithSubStyles *>(instance);
  84                         return ssinstance->LineEndTypesSupported();
  85                 }
  86         }
  87         return 0;
  88 }
  89
  90 Document::Document() {
  91         refCount = 0;
  92         pcf = NULL;
  93 #ifdef _WIN32
  94         eolMode = SC_EOL_CRLF;
  95 #else
  96         eolMode = SC_EOL_LF;
  97 #endif
  98         dbcsCodePage = 0;
  99         lineEndBitSet = SC_LINE_END_TYPE_DEFAULT;
 100         endStyled = 0;
 101         styleClock = 0;
 102         enteredModification = 0;
 103         enteredStyling = 0;
 104         enteredReadOnlyCount = 0;
 105         insertionSet = false;
 106         tabInChars = 8;
 107         indentInChars = 0;
 108         actualIndentInChars = 8;
 109         useTabs = true;
 110         tabIndents = true;
 111         backspaceUnindents = false;
 112
 113         matchesValid = false;
 114         regex = 0;
 115
 116         UTF8BytesOfLeadInitialise();
 117
 118         perLineData[ldMarkers] = new LineMarkers();
 119         perLineData[ldLevels] = new LineLevels();
 120         perLineData[ldState] = new LineState();
 121         perLineData[ldMargin] = new LineAnnotation();
 122         perLineData[ldAnnotation] = new LineAnnotation();
 123
 124         cb.SetPerLine(this);
 125
 126         pli = 0;
 127 }
 128
 129 Document::~Document() {
 130         for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
 131                 it->watcher->NotifyDeleted(this, it->userData);
 132         }
 133         for (int j=0; j<ldSize; j++) {
 134                 delete perLineData[j];
 135                 perLineData[j] = 0;
 136         }
 137         delete regex;
 138         regex = 0;
 139         delete pli;
 140         pli = 0;
 141         delete pcf;
 142         pcf = 0;
 143 }
 144
 145 void Document::Init() {
 146         for (int j=0; j<ldSize; j++) {
 147                 if (perLineData[j])
 148                         perLineData[j]->Init();
 149         }
 150 }
 151
 152 int Document::LineEndTypesSupported() const {
 153         if ((SC_CP_UTF8 == dbcsCodePage) && pli)
 154                 return pli->LineEndTypesSupported();
 155         else
 156                 return 0;
 157 }
 158
 159 bool Document::SetDBCSCodePage(int dbcsCodePage_) {
 160         if (dbcsCodePage != dbcsCodePage_) {
 161                 dbcsCodePage = dbcsCodePage_;
 162                 SetCaseFolder(NULL);
 163                 cb.SetLineEndTypes(lineEndBitSet & LineEndTypesSupported());
 164                 return true;
 165         } else {
 166                 return false;
 167         }
 168 }
 169
 170 bool Document::SetLineEndTypesAllowed(int lineEndBitSet_) {
 171         if (lineEndBitSet != lineEndBitSet_) {
 172                 lineEndBitSet = lineEndBitSet_;
 173                 int lineEndBitSetActive = lineEndBitSet & LineEndTypesSupported();
 174                 if (lineEndBitSetActive != cb.GetLineEndTypes()) {
 175                         ModifiedAt(0);
 176                         cb.SetLineEndTypes(lineEndBitSetActive);
 177                         return true;
 178                 } else {
 179                         return false;
 180                 }
 181         } else {
 182                 return false;
 183         }
 184 }
 185
 186 void Document::InsertLine(int line) {
 187         for (int j=0; j<ldSize; j++) {
 188                 if (perLineData[j])
 189                         perLineData[j]->InsertLine(line);
 190         }
 191 }
 192
 193 void Document::RemoveLine(int line) {
 194         for (int j=0; j<ldSize; j++) {
 195                 if (perLineData[j])
 196                         perLineData[j]->RemoveLine(line);
 197         }
 198 }
 199
 200 // Increase reference count and return its previous value.
 201 int Document::AddRef() {
 202         return refCount++;
 203 }
 204
 205 // Decrease reference count and return its previous value.
 206 // Delete the document if reference count reaches zero.
 207 int SCI_METHOD Document::Release() {
 208         int curRefCount = --refCount;
 209         if (curRefCount == 0)
 210                 delete this;
 211         return curRefCount;
 212 }
 213
 214 void Document::SetSavePoint() {
 215         cb.SetSavePoint();
 216         NotifySavePoint(true);
 217 }
 218
 219 void Document::TentativeUndo() {
 220         if (!TentativeActive())
 221                 return;
 222         CheckReadOnly();
 223         if (enteredModification == 0) {
 224                 enteredModification++;
 225                 if (!cb.IsReadOnly()) {
 226                         bool startSavePoint = cb.IsSavePoint();
 227                         bool multiLine = false;
 228                         int steps = cb.TentativeSteps();
 229                         //Platform::DebugPrintf("Steps=%d\n", steps);
 230                         for (int step = 0; step < steps; step++) {
 231                                 const int prevLinesTotal = LinesTotal();
 232                                 const Action &action = cb.GetUndoStep();
 233                                 if (action.at == removeAction) {
 234                                         NotifyModified(DocModification(
 235                                                                         SC_MOD_BEFOREINSERT | SC_PERFORMED_UNDO, action));
 236                                 } else if (action.at == containerAction) {
 237                                         DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_UNDO);
 238                                         dm.token = action.position;
 239                                         NotifyModified(dm);
 240                                 } else {
 241                                         NotifyModified(DocModification(
 242                                                                         SC_MOD_BEFOREDELETE | SC_PERFORMED_UNDO, action));
 243                                 }
 244                                 cb.PerformUndoStep();
 245                                 if (action.at != containerAction) {
 246                                         ModifiedAt(action.position);
 247                                 }
 248
 249                                 int modFlags = SC_PERFORMED_UNDO;
 250                                 // With undo, an insertion action becomes a deletion notification
 251                                 if (action.at == removeAction) {
 252                                         modFlags |= SC_MOD_INSERTTEXT;
 253                                 } else if (action.at == insertAction) {
 254                                         modFlags |= SC_MOD_DELETETEXT;
 255                                 }
 256                                 if (steps > 1)
 257                                         modFlags |= SC_MULTISTEPUNDOREDO;
 258                                 const int linesAdded = LinesTotal() - prevLinesTotal;
 259                                 if (linesAdded != 0)
 260                                         multiLine = true;
 261                                 if (step == steps - 1) {
 262                                         modFlags |= SC_LASTSTEPINUNDOREDO;
 263                                         if (multiLine)
 264                                                 modFlags |= SC_MULTILINEUNDOREDO;
 265                                 }
 266                                 NotifyModified(DocModification(modFlags, action.position, action.lenData,
 267                                                                                            linesAdded, action.data));
 268                         }
 269
 270                         bool endSavePoint = cb.IsSavePoint();
 271                         if (startSavePoint != endSavePoint)
 272                                 NotifySavePoint(endSavePoint);
 273
 274                         cb.TentativeCommit();
 275                 }
 276                 enteredModification--;
 277         }
 278 }
 279
 280 int Document::GetMark(int line) {
 281         return static_cast<LineMarkers *>(perLineData[ldMarkers])->MarkValue(line);
 282 }
 283
 284 int Document::MarkerNext(int lineStart, int mask) const {
 285         return static_cast<LineMarkers *>(perLineData[ldMarkers])->MarkerNext(lineStart, mask);
 286 }
 287
 288 int Document::AddMark(int line, int markerNum) {
 289         if (line >= 0 && line <= LinesTotal()) {
 290                 int prev = static_cast<LineMarkers *>(perLineData[ldMarkers])->
 291                         AddMark(line, markerNum, LinesTotal());
 292                 DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, 0, line);
 293                 NotifyModified(mh);
 294                 return prev;
 295         } else {
 296                 return 0;
 297         }
 298 }
 299
 300 void Document::AddMarkSet(int line, int valueSet) {
 301         if (line < 0 || line > LinesTotal()) {
 302                 return;
 303         }
 304         unsigned int m = valueSet;
 305         for (int i = 0; m; i++, m >>= 1)
 306                 if (m & 1)
 307                         static_cast<LineMarkers *>(perLineData[ldMarkers])->
 308                                 AddMark(line, i, LinesTotal());
 309         DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, 0, line);
 310         NotifyModified(mh);
 311 }
 312
 313 void Document::DeleteMark(int line, int markerNum) {
 314         static_cast<LineMarkers *>(perLineData[ldMarkers])->DeleteMark(line, markerNum, false);
 315         DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, 0, line);
 316         NotifyModified(mh);
 317 }
 318
 319 void Document::DeleteMarkFromHandle(int markerHandle) {
 320         static_cast<LineMarkers *>(perLineData[ldMarkers])->DeleteMarkFromHandle(markerHandle);
 321         DocModification mh(SC_MOD_CHANGEMARKER, 0, 0, 0, 0);
 322         mh.line = -1;
 323         NotifyModified(mh);
 324 }
 325
 326 void Document::DeleteAllMarks(int markerNum) {
 327         bool someChanges = false;
 328         for (int line = 0; line < LinesTotal(); line++) {
 329                 if (static_cast<LineMarkers *>(perLineData[ldMarkers])->DeleteMark(line, markerNum, true))
 330                         someChanges = true;
 331         }
 332         if (someChanges) {
 333                 DocModification mh(SC_MOD_CHANGEMARKER, 0, 0, 0, 0);
 334                 mh.line = -1;
 335                 NotifyModified(mh);
 336         }
 337 }
 338
 339 int Document::LineFromHandle(int markerHandle) {
 340         return static_cast<LineMarkers *>(perLineData[ldMarkers])->LineFromHandle(markerHandle);
 341 }
 342
 343 Sci_Position SCI_METHOD Document::LineStart(Sci_Position line) const {
 344         return cb.LineStart(line);
 345 }
 346
 347 bool Document::IsLineStartPosition(int position) const {
 348         return LineStart(LineFromPosition(position)) == position;
 349 }
 350
 351 Sci_Position SCI_METHOD Document::LineEnd(Sci_Position line) const {
 352         if (line >= LinesTotal() - 1) {
 353                 return LineStart(line + 1);
 354         } else {
 355                 int position = LineStart(line + 1);
 356                 if (SC_CP_UTF8 == dbcsCodePage) {
 357                         unsigned char bytes[] = {
 358                                 static_cast<unsigned char>(cb.CharAt(position-3)),
 359                                 static_cast<unsigned char>(cb.CharAt(position-2)),
 360                                 static_cast<unsigned char>(cb.CharAt(position-1)),
 361                         };
 362                         if (UTF8IsSeparator(bytes)) {
 363                                 return position - UTF8SeparatorLength;
 364                         }
 365                         if (UTF8IsNEL(bytes+1)) {
 366                                 return position - UTF8NELLength;
 367                         }
 368                 }
 369                 position--; // Back over CR or LF
 370                 // When line terminator is CR+LF, may need to go back one more
 371                 if ((position > LineStart(line)) && (cb.CharAt(position - 1) == '\r')) {
 372                         position--;
 373                 }
 374                 return position;
 375         }
 376 }
 377
 378 void SCI_METHOD Document::SetErrorStatus(int status) {
 379         // Tell the watchers an error has occurred.
 380         for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
 381                 it->watcher->NotifyErrorOccurred(this, it->userData, status);
 382         }
 383 }
 384
 385 Sci_Position SCI_METHOD Document::LineFromPosition(Sci_Position pos) const {
 386         return cb.LineFromPosition(pos);
 387 }
 388
 389 int Document::LineEndPosition(int position) const {
 390         return LineEnd(LineFromPosition(position));
 391 }
 392
 393 bool Document::IsLineEndPosition(int position) const {
 394         return LineEnd(LineFromPosition(position)) == position;
 395 }
 396
 397 bool Document::IsPositionInLineEnd(int position) const {
 398         return position >= LineEnd(LineFromPosition(position));
 399 }
 400
 401 int Document::VCHomePosition(int position) const {
 402         int line = LineFromPosition(position);
 403         int startPosition = LineStart(line);
 404         int endLine = LineEnd(line);
 405         int startText = startPosition;
 406         while (startText < endLine && (cb.CharAt(startText) == ' ' || cb.CharAt(startText) == '\t'))
 407                 startText++;
 408         if (position == startText)
 409                 return startPosition;
 410         else
 411                 return startText;
 412 }
 413
 414 int SCI_METHOD Document::SetLevel(Sci_Position line, int level) {
 415         int prev = static_cast<LineLevels *>(perLineData[ldLevels])->SetLevel(line, level, LinesTotal());
 416         if (prev != level) {
 417                 DocModification mh(SC_MOD_CHANGEFOLD | SC_MOD_CHANGEMARKER,
 418                                    LineStart(line), 0, 0, 0, line);
 419                 mh.foldLevelNow = level;
 420                 mh.foldLevelPrev = prev;
 421                 NotifyModified(mh);
 422         }
 423         return prev;
 424 }
 425
 426 int SCI_METHOD Document::GetLevel(Sci_Position line) const {
 427         return static_cast<LineLevels *>(perLineData[ldLevels])->GetLevel(line);
 428 }
 429
 430 void Document::ClearLevels() {
 431         static_cast<LineLevels *>(perLineData[ldLevels])->ClearLevels();
 432 }
 433
 434 static bool IsSubordinate(int levelStart, int levelTry) {
 435         if (levelTry & SC_FOLDLEVELWHITEFLAG)
 436                 return true;
 437         else
 438                 return (levelStart & SC_FOLDLEVELNUMBERMASK) < (levelTry & SC_FOLDLEVELNUMBERMASK);
 439 }
 440
 441 int Document::GetLastChild(int lineParent, int level, int lastLine) {
 442         if (level == -1)
 443                 level = GetLevel(lineParent) & SC_FOLDLEVELNUMBERMASK;
 444         int maxLine = LinesTotal();
 445         int lookLastLine = (lastLine != -1) ? Platform::Minimum(LinesTotal() - 1, lastLine) : -1;
 446         int lineMaxSubord = lineParent;
 447         while (lineMaxSubord < maxLine - 1) {
 448                 EnsureStyledTo(LineStart(lineMaxSubord + 2));
 449                 if (!IsSubordinate(level, GetLevel(lineMaxSubord + 1)))
 450                         break;
 451                 if ((lookLastLine != -1) && (lineMaxSubord >= lookLastLine) && !(GetLevel(lineMaxSubord) & SC_FOLDLEVELWHITEFLAG))
 452                         break;
 453                 lineMaxSubord++;
 454         }
 455         if (lineMaxSubord > lineParent) {
 456                 if (level > (GetLevel(lineMaxSubord + 1) & SC_FOLDLEVELNUMBERMASK)) {
 457                         // Have chewed up some whitespace that belongs to a parent so seek back
 458                         if (GetLevel(lineMaxSubord) & SC_FOLDLEVELWHITEFLAG) {
 459                                 lineMaxSubord--;
 460                         }
 461                 }
 462         }
 463         return lineMaxSubord;
 464 }
 465
 466 int Document::GetFoldParent(int line) const {
 467         int level = GetLevel(line) & SC_FOLDLEVELNUMBERMASK;
 468         int lineLook = line - 1;
 469         while ((lineLook > 0) && (
 470                     (!(GetLevel(lineLook) & SC_FOLDLEVELHEADERFLAG)) ||
 471                     ((GetLevel(lineLook) & SC_FOLDLEVELNUMBERMASK) >= level))
 472               ) {
 473                 lineLook--;
 474         }
 475         if ((GetLevel(lineLook) & SC_FOLDLEVELHEADERFLAG) &&
 476                 ((GetLevel(lineLook) & SC_FOLDLEVELNUMBERMASK) < level)) {
 477                 return lineLook;
 478         } else {
 479                 return -1;
 480         }
 481 }
 482
 483 void Document::GetHighlightDelimiters(HighlightDelimiter &highlightDelimiter, int line, int lastLine) {
 484         int level = GetLevel(line);
 485         int lookLastLine = Platform::Maximum(line, lastLine) + 1;
 486
 487         int lookLine = line;
 488         int lookLineLevel = level;
 489         int lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
 490         while ((lookLine > 0) && ((lookLineLevel & SC_FOLDLEVELWHITEFLAG) ||
 491                 ((lookLineLevel & SC_FOLDLEVELHEADERFLAG) && (lookLineLevelNum >= (GetLevel(lookLine + 1) & SC_FOLDLEVELNUMBERMASK))))) {
 492                 lookLineLevel = GetLevel(--lookLine);
 493                 lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
 494         }
 495
 496         int beginFoldBlock = (lookLineLevel & SC_FOLDLEVELHEADERFLAG) ? lookLine : GetFoldParent(lookLine);
 497         if (beginFoldBlock == -1) {
 498                 highlightDelimiter.Clear();
 499                 return;
 500         }
 501
 502         int endFoldBlock = GetLastChild(beginFoldBlock, -1, lookLastLine);
 503         int firstChangeableLineBefore = -1;
 504         if (endFoldBlock < line) {
 505                 lookLine = beginFoldBlock - 1;
 506                 lookLineLevel = GetLevel(lookLine);
 507                 lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
 508                 while ((lookLine >= 0) && (lookLineLevelNum >= SC_FOLDLEVELBASE)) {
 509                         if (lookLineLevel & SC_FOLDLEVELHEADERFLAG) {
 510                                 if (GetLastChild(lookLine, -1, lookLastLine) == line) {
 511                                         beginFoldBlock = lookLine;
 512                                         endFoldBlock = line;
 513                                         firstChangeableLineBefore = line - 1;
 514                                 }
 515                         }
 516                         if ((lookLine > 0) && (lookLineLevelNum == SC_FOLDLEVELBASE) && ((GetLevel(lookLine - 1) & SC_FOLDLEVELNUMBERMASK) > lookLineLevelNum))
 517                                 break;
 518                         lookLineLevel = GetLevel(--lookLine);
 519                         lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
 520                 }
 521         }
 522         if (firstChangeableLineBefore == -1) {
 523                 for (lookLine = line - 1, lookLineLevel = GetLevel(lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
 524                         lookLine >= beginFoldBlock;
 525                         lookLineLevel = GetLevel(--lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK) {
 526                         if ((lookLineLevel & SC_FOLDLEVELWHITEFLAG) || (lookLineLevelNum > (level & SC_FOLDLEVELNUMBERMASK))) {
 527                                 firstChangeableLineBefore = lookLine;
 528                                 break;
 529                         }
 530                 }
 531         }
 532         if (firstChangeableLineBefore == -1)
 533                 firstChangeableLineBefore = beginFoldBlock - 1;
 534
 535         int firstChangeableLineAfter = -1;
 536         for (lookLine = line + 1, lookLineLevel = GetLevel(lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
 537                 lookLine <= endFoldBlock;
 538                 lookLineLevel = GetLevel(++lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK) {
 539                 if ((lookLineLevel & SC_FOLDLEVELHEADERFLAG) && (lookLineLevelNum < (GetLevel(lookLine + 1) & SC_FOLDLEVELNUMBERMASK))) {
 540                         firstChangeableLineAfter = lookLine;
 541                         break;
 542                 }
 543         }
 544         if (firstChangeableLineAfter == -1)
 545                 firstChangeableLineAfter = endFoldBlock + 1;
 546
 547         highlightDelimiter.beginFoldBlock = beginFoldBlock;
 548         highlightDelimiter.endFoldBlock = endFoldBlock;
 549         highlightDelimiter.firstChangeableLineBefore = firstChangeableLineBefore;
 550         highlightDelimiter.firstChangeableLineAfter = firstChangeableLineAfter;
 551 }
 552
 553 int Document::ClampPositionIntoDocument(int pos) const {
 554         return Platform::Clamp(pos, 0, Length());
 555 }
 556
 557 bool Document::IsCrLf(int pos) const {
 558         if (pos < 0)
 559                 return false;
 560         if (pos >= (Length() - 1))
 561                 return false;
 562         return (cb.CharAt(pos) == '\r') && (cb.CharAt(pos + 1) == '\n');
 563 }
 564
 565 int Document::LenChar(int pos) {
 566         if (pos < 0) {
 567                 return 1;
 568         } else if (IsCrLf(pos)) {
 569                 return 2;
 570         } else if (SC_CP_UTF8 == dbcsCodePage) {
 571                 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(pos));
 572                 const int widthCharBytes = UTF8BytesOfLead[leadByte];
 573                 int lengthDoc = Length();
 574                 if ((pos + widthCharBytes) > lengthDoc)
 575                         return lengthDoc - pos;
 576                 else
 577                         return widthCharBytes;
 578         } else if (dbcsCodePage) {
 579                 return IsDBCSLeadByte(cb.CharAt(pos)) ? 2 : 1;
 580         } else {
 581                 return 1;
 582         }
 583 }
 584
 585 bool Document::InGoodUTF8(int pos, int &start, int &end) const {
 586         int trail = pos;
 587         while ((trail>0) && (pos-trail < UTF8MaxBytes) && UTF8IsTrailByte(static_cast<unsigned char>(cb.CharAt(trail-1))))
 588                 trail--;
 589         start = (trail > 0) ? trail-1 : trail;
 590
 591         const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(start));
 592         const int widthCharBytes = UTF8BytesOfLead[leadByte];
 593         if (widthCharBytes == 1) {
 594                 return false;
 595         } else {
 596                 int trailBytes = widthCharBytes - 1;
 597                 int len = pos - start;
 598                 if (len > trailBytes)
 599                         // pos too far from lead
 600                         return false;
 601                 char charBytes[UTF8MaxBytes] = {static_cast<char>(leadByte),0,0,0};
 602                 for (int b=1; b<widthCharBytes && ((start+b) < Length()); b++)
 603                         charBytes[b] = cb.CharAt(static_cast<int>(start+b));
 604                 int utf8status = UTF8Classify(reinterpret_cast<const unsigned char *>(charBytes), widthCharBytes);
 605                 if (utf8status & UTF8MaskInvalid)
 606                         return false;
 607                 end = start + widthCharBytes;
 608                 return true;
 609         }
 610 }
 611
 612 // Normalise a position so that it is not halfway through a two byte character.
 613 // This can occur in two situations -
 614 // When lines are terminated with \r\n pairs which should be treated as one character.
 615 // When displaying DBCS text such as Japanese.
 616 // If moving, move the position in the indicated direction.
 617 int Document::MovePositionOutsideChar(int pos, int moveDir, bool checkLineEnd) const {
 618         //Platform::DebugPrintf("NoCRLF %d %d\n", pos, moveDir);
 619         // If out of range, just return minimum/maximum value.
 620         if (pos <= 0)
 621                 return 0;
 622         if (pos >= Length())
 623                 return Length();
 624
 625         // PLATFORM_ASSERT(pos > 0 && pos < Length());
 626         if (checkLineEnd && IsCrLf(pos - 1)) {
 627                 if (moveDir > 0)
 628                         return pos + 1;
 629                 else
 630                         return pos - 1;
 631         }
 632
 633         if (dbcsCodePage) {
 634                 if (SC_CP_UTF8 == dbcsCodePage) {
 635                         unsigned char ch = static_cast<unsigned char>(cb.CharAt(pos));
 636                         // If ch is not a trail byte then pos is valid intercharacter position
 637                         if (UTF8IsTrailByte(ch)) {
 638                                 int startUTF = pos;
 639                                 int endUTF = pos;
 640                                 if (InGoodUTF8(pos, startUTF, endUTF)) {
 641                                         // ch is a trail byte within a UTF-8 character
 642                                         if (moveDir > 0)
 643                                                 pos = endUTF;
 644                                         else
 645                                                 pos = startUTF;
 646                                 }
 647                                 // Else invalid UTF-8 so return position of isolated trail byte
 648                         }
 649                 } else {
 650                         // Anchor DBCS calculations at start of line because start of line can
 651                         // not be a DBCS trail byte.
 652                         int posStartLine = LineStart(LineFromPosition(pos));
 653                         if (pos == posStartLine)
 654                                 return pos;
 655
 656                         // Step back until a non-lead-byte is found.
 657                         int posCheck = pos;
 658                         while ((posCheck > posStartLine) && IsDBCSLeadByte(cb.CharAt(posCheck-1)))
 659                                 posCheck--;
 660
 661                         // Check from known start of character.
 662                         while (posCheck < pos) {
 663                                 int mbsize = IsDBCSLeadByte(cb.CharAt(posCheck)) ? 2 : 1;
 664                                 if (posCheck + mbsize == pos) {
 665                                         return pos;
 666                                 } else if (posCheck + mbsize > pos) {
 667                                         if (moveDir > 0) {
 668                                                 return posCheck + mbsize;
 669                                         } else {
 670                                                 return posCheck;
 671                                         }
 672                                 }
 673                                 posCheck += mbsize;
 674                         }
 675                 }
 676         }
 677
 678         return pos;
 679 }
 680
 681 // NextPosition moves between valid positions - it can not handle a position in the middle of a
 682 // multi-byte character. It is used to iterate through text more efficiently than MovePositionOutsideChar.
 683 // A \r\n pair is treated as two characters.
 684 int Document::NextPosition(int pos, int moveDir) const {
 685         // If out of range, just return minimum/maximum value.
 686         int increment = (moveDir > 0) ? 1 : -1;
 687         if (pos + increment <= 0)
 688                 return 0;
 689         if (pos + increment >= Length())
 690                 return Length();
 691
 692         if (dbcsCodePage) {
 693                 if (SC_CP_UTF8 == dbcsCodePage) {
 694                         if (increment == 1) {
 695                                 // Simple forward movement case so can avoid some checks
 696                                 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(pos));
 697                                 if (UTF8IsAscii(leadByte)) {
 698                                         // Single byte character or invalid
 699                                         pos++;
 700                                 } else {
 701                                         const int widthCharBytes = UTF8BytesOfLead[leadByte];
 702                                         char charBytes[UTF8MaxBytes] = {static_cast<char>(leadByte),0,0,0};
 703                                         for (int b=1; b<widthCharBytes; b++)
 704                                                 charBytes[b] = cb.CharAt(static_cast<int>(pos+b));
 705                                         int utf8status = UTF8Classify(reinterpret_cast<const unsigned char *>(charBytes), widthCharBytes);
 706                                         if (utf8status & UTF8MaskInvalid)
 707                                                 pos++;
 708                                         else
 709                                                 pos += utf8status & UTF8MaskWidth;
 710                                 }
 711                         } else {
 712                                 // Examine byte before position
 713                                 pos--;
 714                                 unsigned char ch = static_cast<unsigned char>(cb.CharAt(pos));
 715                                 // If ch is not a trail byte then pos is valid intercharacter position
 716                                 if (UTF8IsTrailByte(ch)) {
 717                                         // If ch is a trail byte in a valid UTF-8 character then return start of character
 718                                         int startUTF = pos;
 719                                         int endUTF = pos;
 720                                         if (InGoodUTF8(pos, startUTF, endUTF)) {
 721                                                 pos = startUTF;
 722                                         }
 723                                         // Else invalid UTF-8 so return position of isolated trail byte
 724                                 }
 725                         }
 726                 } else {
 727                         if (moveDir > 0) {
 728                                 int mbsize = IsDBCSLeadByte(cb.CharAt(pos)) ? 2 : 1;
 729                                 pos += mbsize;
 730                                 if (pos > Length())
 731                                         pos = Length();
 732                         } else {
 733                                 // Anchor DBCS calculations at start of line because start of line can
 734                                 // not be a DBCS trail byte.
 735                                 int posStartLine = LineStart(LineFromPosition(pos));
 736                                 // See http://msdn.microsoft.com/en-us/library/cc194792%28v=MSDN.10%29.aspx
 737                                 // http://msdn.microsoft.com/en-us/library/cc194790.aspx
 738                                 if ((pos - 1) <= posStartLine) {
 739                                         return pos - 1;
 740                                 } else if (IsDBCSLeadByte(cb.CharAt(pos - 1))) {
 741                                         // Must actually be trail byte
 742                                         return pos - 2;
 743                                 } else {
 744                                         // Otherwise, step back until a non-lead-byte is found.
 745                                         int posTemp = pos - 1;
 746                                         while (posStartLine <= --posTemp && IsDBCSLeadByte(cb.CharAt(posTemp)))
 747                                                 ;
 748                                         // Now posTemp+1 must point to the beginning of a character,
 749                                         // so figure out whether we went back an even or an odd
 750                                         // number of bytes and go back 1 or 2 bytes, respectively.
 751                                         return (pos - 1 - ((pos - posTemp) & 1));
 752                                 }
 753                         }
 754                 }
 755         } else {
 756                 pos += increment;
 757         }
 758
 759         return pos;
 760 }
 761
 762 bool Document::NextCharacter(int &pos, int moveDir) const {
 763         // Returns true if pos changed
 764         int posNext = NextPosition(pos, moveDir);
 765         if (posNext == pos) {
 766                 return false;
 767         } else {
 768                 pos = posNext;
 769                 return true;
 770         }
 771 }
 772
 773 // Return -1  on out-of-bounds
 774 Sci_Position SCI_METHOD Document::GetRelativePosition(Sci_Position positionStart, Sci_Position characterOffset) const {
 775         int pos = positionStart;
 776         if (dbcsCodePage) {
 777                 const int increment = (characterOffset > 0) ? 1 : -1;
 778                 while (characterOffset != 0) {
 779                         const int posNext = NextPosition(pos, increment);
 780                         if (posNext == pos)
 781                                 return INVALID_POSITION;
 782                         pos = posNext;
 783                         characterOffset -= increment;
 784                 }
 785         } else {
 786                 pos = positionStart + characterOffset;
 787                 if ((pos < 0) || (pos > Length()))
 788                         return INVALID_POSITION;
 789         }
 790         return pos;
 791 }
 792
 793 int Document::GetRelativePositionUTF16(int positionStart, int characterOffset) const {
 794         int pos = positionStart;
 795         if (dbcsCodePage) {
 796                 const int increment = (characterOffset > 0) ? 1 : -1;
 797                 while (characterOffset != 0) {
 798                         const int posNext = NextPosition(pos, increment);
 799                         if (posNext == pos)
 800                                 return INVALID_POSITION;
 801                         if (abs(pos-posNext) > 3)       // 4 byte character = 2*UTF16.
 802                                 characterOffset -= increment;
 803                         pos = posNext;
 804                         characterOffset -= increment;
 805                 }
 806         } else {
 807                 pos = positionStart + characterOffset;
 808                 if ((pos < 0) || (pos > Length()))
 809                         return INVALID_POSITION;
 810         }
 811         return pos;
 812 }
 813
 814 int SCI_METHOD Document::GetCharacterAndWidth(Sci_Position position, Sci_Position *pWidth) const {
 815         int character;
 816         int bytesInCharacter = 1;
 817         if (dbcsCodePage) {
 818                 const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(position));
 819                 if (SC_CP_UTF8 == dbcsCodePage) {
 820                         if (UTF8IsAscii(leadByte)) {
 821                                 // Single byte character or invalid
 822                                 character =  leadByte;
 823                         } else {
 824                                 const int widthCharBytes = UTF8BytesOfLead[leadByte];
 825                                 unsigned char charBytes[UTF8MaxBytes] = {leadByte,0,0,0};
 826                                 for (int b=1; b<widthCharBytes; b++)
 827                                         charBytes[b] = static_cast<unsigned char>(cb.CharAt(position+b));
 828                                 int utf8status = UTF8Classify(charBytes, widthCharBytes);
 829                                 if (utf8status & UTF8MaskInvalid) {
 830                                         // Report as singleton surrogate values which are invalid Unicode
 831                                         character =  0xDC80 + leadByte;
 832                                 } else {
 833                                         bytesInCharacter = utf8status & UTF8MaskWidth;
 834                                         character = UnicodeFromUTF8(charBytes);
 835                                 }
 836                         }
 837                 } else {
 838                         if (IsDBCSLeadByte(leadByte)) {
 839                                 bytesInCharacter = 2;
 840                                 character = (leadByte << 8) | static_cast<unsigned char>(cb.CharAt(position+1));
 841                         } else {
 842                                 character = leadByte;
 843                         }
 844                 }
 845         } else {
 846                 character = cb.CharAt(position);
 847         }
 848         if (pWidth) {
 849                 *pWidth = bytesInCharacter;
 850         }
 851         return character;
 852 }
 853
 854 int SCI_METHOD Document::CodePage() const {
 855         return dbcsCodePage;
 856 }
 857
 858 bool SCI_METHOD Document::IsDBCSLeadByte(char ch) const {
 859         // Byte ranges found in Wikipedia articles with relevant search strings in each case
 860         unsigned char uch = static_cast<unsigned char>(ch);
 861         switch (dbcsCodePage) {
 862                 case 932:
 863                         // Shift_jis
 864                         return ((uch >= 0x81) && (uch <= 0x9F)) ||
 865                                 ((uch >= 0xE0) && (uch <= 0xFC));
 866                                 // Lead bytes F0 to FC may be a Microsoft addition.
 867                 case 936:
 868                         // GBK
 869                         return (uch >= 0x81) && (uch <= 0xFE);
 870                 case 949:
 871                         // Korean Wansung KS C-5601-1987
 872                         return (uch >= 0x81) && (uch <= 0xFE);
 873                 case 950:
 874                         // Big5
 875                         return (uch >= 0x81) && (uch <= 0xFE);
 876                 case 1361:
 877                         // Korean Johab KS C-5601-1992
 878                         return
 879                                 ((uch >= 0x84) && (uch <= 0xD3)) ||
 880                                 ((uch >= 0xD8) && (uch <= 0xDE)) ||
 881                                 ((uch >= 0xE0) && (uch <= 0xF9));
 882         }
 883         return false;
 884 }
 885
 886 static inline bool IsSpaceOrTab(int ch) {
 887         return ch == ' ' || ch == '\t';
 888 }
 889
 890 // Need to break text into segments near lengthSegment but taking into
 891 // account the encoding to not break inside a UTF-8 or DBCS character
 892 // and also trying to avoid breaking inside a pair of combining characters.
 893 // The segment length must always be long enough (more than 4 bytes)
 894 // so that there will be at least one whole character to make a segment.
 895 // For UTF-8, text must consist only of valid whole characters.
 896 // In preference order from best to worst:
 897 //   1) Break after space
 898 //   2) Break before punctuation
 899 //   3) Break after whole character
 900
 901 int Document::SafeSegment(const char *text, int length, int lengthSegment) const {
 902         if (length <= lengthSegment)
 903                 return length;
 904         int lastSpaceBreak = -1;
 905         int lastPunctuationBreak = -1;
 906         int lastEncodingAllowedBreak = 0;
 907         for (int j=0; j < lengthSegment;) {
 908                 unsigned char ch = static_cast<unsigned char>(text[j]);
 909                 if (j > 0) {
 910                         if (IsSpaceOrTab(text[j - 1]) && !IsSpaceOrTab(text[j])) {
 911                                 lastSpaceBreak = j;
 912                         }
 913                         if (ch < 'A') {
 914                                 lastPunctuationBreak = j;
 915                         }
 916                 }
 917                 lastEncodingAllowedBreak = j;
 918
 919                 if (dbcsCodePage == SC_CP_UTF8) {
 920                         j += UTF8BytesOfLead[ch];
 921                 } else if (dbcsCodePage) {
 922                         j += IsDBCSLeadByte(ch) ? 2 : 1;
 923                 } else {
 924                         j++;
 925                 }
 926         }
 927         if (lastSpaceBreak >= 0) {
 928                 return lastSpaceBreak;
 929         } else if (lastPunctuationBreak >= 0) {
 930                 return lastPunctuationBreak;
 931         }
 932         return lastEncodingAllowedBreak;
 933 }
 934
 935 EncodingFamily Document::CodePageFamily() const {
 936         if (SC_CP_UTF8 == dbcsCodePage)
 937                 return efUnicode;
 938         else if (dbcsCodePage)
 939                 return efDBCS;
 940         else
 941                 return efEightBit;
 942 }
 943
 944 void Document::ModifiedAt(int pos) {
 945         if (endStyled > pos)
 946                 endStyled = pos;
 947 }
 948
 949 void Document::CheckReadOnly() {
 950         if (cb.IsReadOnly() && enteredReadOnlyCount == 0) {
 951                 enteredReadOnlyCount++;
 952                 NotifyModifyAttempt();
 953                 enteredReadOnlyCount--;
 954         }
 955 }
 956
 957 // Document only modified by gateways DeleteChars, InsertString, Undo, Redo, and SetStyleAt.
 958 // SetStyleAt does not change the persistent state of a document
 959
 960 bool Document::DeleteChars(int pos, int len) {
 961         if (pos < 0)
 962                 return false;
 963         if (len <= 0)
 964                 return false;
 965         if ((pos + len) > Length())
 966                 return false;
 967         CheckReadOnly();
 968         if (enteredModification != 0) {
 969                 return false;
 970         } else {
 971                 enteredModification++;
 972                 if (!cb.IsReadOnly()) {
 973                         NotifyModified(
 974                             DocModification(
 975                                 SC_MOD_BEFOREDELETE | SC_PERFORMED_USER,
 976                                 pos, len,
 977                                 0, 0));
 978                         int prevLinesTotal = LinesTotal();
 979                         bool startSavePoint = cb.IsSavePoint();
 980                         bool startSequence = false;
 981                         const char *text = cb.DeleteChars(pos, len, startSequence);
 982                         if (startSavePoint && cb.IsCollectingUndo())
 983                                 NotifySavePoint(!startSavePoint);
 984                         if ((pos < Length()) || (pos == 0))
 985                                 ModifiedAt(pos);
 986                         else
 987                                 ModifiedAt(pos-1);
 988                         NotifyModified(
 989                             DocModification(
 990                                 SC_MOD_DELETETEXT | SC_PERFORMED_USER | (startSequence?SC_STARTACTION:0),
 991                                 pos, len,
 992                                 LinesTotal() - prevLinesTotal, text));
 993                 }
 994                 enteredModification--;
 995         }
 996         return !cb.IsReadOnly();
 997 }
 998
 999 /**
1000  * Insert a string with a length.
1001  */
1002 int Document::InsertString(int position, const char *s, int insertLength) {
1003         if (insertLength <= 0) {
1004                 return 0;
1005         }
1006         CheckReadOnly();        // Application may change read only state here
1007         if (cb.IsReadOnly()) {
1008                 return 0;
1009         }
1010         if (enteredModification != 0) {
1011                 return 0;
1012         }
1013         enteredModification++;
1014         insertionSet = false;
1015         insertion.clear();
1016         NotifyModified(
1017                 DocModification(
1018                         SC_MOD_INSERTCHECK,
1019                         position, insertLength,
1020                         0, s));
1021         if (insertionSet) {
1022                 s = insertion.c_str();
1023                 insertLength = static_cast<int>(insertion.length());
1024         }
1025         NotifyModified(
1026                 DocModification(
1027                         SC_MOD_BEFOREINSERT | SC_PERFORMED_USER,
1028                         position, insertLength,
1029                         0, s));
1030         int prevLinesTotal = LinesTotal();
1031         bool startSavePoint = cb.IsSavePoint();
1032         bool startSequence = false;
1033         const char *text = cb.InsertString(position, s, insertLength, startSequence);
1034         if (startSavePoint && cb.IsCollectingUndo())
1035                 NotifySavePoint(!startSavePoint);
1036         ModifiedAt(position);
1037         NotifyModified(
1038                 DocModification(
1039                         SC_MOD_INSERTTEXT | SC_PERFORMED_USER | (startSequence?SC_STARTACTION:0),
1040                         position, insertLength,
1041                         LinesTotal() - prevLinesTotal, text));
1042         if (insertionSet) {     // Free memory as could be large
1043                 std::string().swap(insertion);
1044         }
1045         enteredModification--;
1046         return insertLength;
1047 }
1048
1049 void Document::ChangeInsertion(const char *s, int length) {
1050         insertionSet = true;
1051         insertion.assign(s, length);
1052 }
1053
1054 int SCI_METHOD Document::AddData(char *data, Sci_Position length) {
1055         try {
1056                 int position = Length();
1057                 InsertString(position, data, length);
1058         } catch (std::bad_alloc &) {
1059                 return SC_STATUS_BADALLOC;
1060         } catch (...) {
1061                 return SC_STATUS_FAILURE;
1062         }
1063         return 0;
1064 }
1065
1066 void * SCI_METHOD Document::ConvertToDocument() {
1067         return this;
1068 }
1069
1070 int Document::Undo() {
1071         int newPos = -1;
1072         CheckReadOnly();
1073         if ((enteredModification == 0) && (cb.IsCollectingUndo())) {
1074                 enteredModification++;
1075                 if (!cb.IsReadOnly()) {
1076                         bool startSavePoint = cb.IsSavePoint();
1077                         bool multiLine = false;
1078                         int steps = cb.StartUndo();
1079                         //Platform::DebugPrintf("Steps=%d\n", steps);
1080                         int coalescedRemovePos = -1;
1081                         int coalescedRemoveLen = 0;
1082                         int prevRemoveActionPos = -1;
1083                         int prevRemoveActionLen = 0;
1084                         for (int step = 0; step < steps; step++) {
1085                                 const int prevLinesTotal = LinesTotal();
1086                                 const Action &action = cb.GetUndoStep();
1087                                 if (action.at == removeAction) {
1088                                         NotifyModified(DocModification(
1089                                                                         SC_MOD_BEFOREINSERT | SC_PERFORMED_UNDO, action));
1090                                 } else if (action.at == containerAction) {
1091                                         DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_UNDO);
1092                                         dm.token = action.position;
1093                                         NotifyModified(dm);
1094                                         if (!action.mayCoalesce) {
1095                                                 coalescedRemovePos = -1;
1096                                                 coalescedRemoveLen = 0;
1097                                                 prevRemoveActionPos = -1;
1098                                                 prevRemoveActionLen = 0;
1099                                         }
1100                                 } else {
1101                                         NotifyModified(DocModification(
1102                                                                         SC_MOD_BEFOREDELETE | SC_PERFORMED_UNDO, action));
1103                                 }
1104                                 cb.PerformUndoStep();
1105                                 if (action.at != containerAction) {
1106                                         ModifiedAt(action.position);
1107                                         newPos = action.position;
1108                                 }
1109
1110                                 int modFlags = SC_PERFORMED_UNDO;
1111                                 // With undo, an insertion action becomes a deletion notification
1112                                 if (action.at == removeAction) {
1113                                         newPos += action.lenData;
1114                                         modFlags |= SC_MOD_INSERTTEXT;
1115                                         if ((coalescedRemoveLen > 0) &&
1116                                                 (action.position == prevRemoveActionPos || action.position == (prevRemoveActionPos + prevRemoveActionLen))) {
1117                                                 coalescedRemoveLen += action.lenData;
1118                                                 newPos = coalescedRemovePos + coalescedRemoveLen;
1119                                         } else {
1120                                                 coalescedRemovePos = action.position;
1121                                                 coalescedRemoveLen = action.lenData;
1122                                         }
1123                                         prevRemoveActionPos = action.position;
1124                                         prevRemoveActionLen = action.lenData;
1125                                 } else if (action.at == insertAction) {
1126                                         modFlags |= SC_MOD_DELETETEXT;
1127                                         coalescedRemovePos = -1;
1128                                         coalescedRemoveLen = 0;
1129                                         prevRemoveActionPos = -1;
1130                                         prevRemoveActionLen = 0;
1131                                 }
1132                                 if (steps > 1)
1133                                         modFlags |= SC_MULTISTEPUNDOREDO;
1134                                 const int linesAdded = LinesTotal() - prevLinesTotal;
1135                                 if (linesAdded != 0)
1136                                         multiLine = true;
1137                                 if (step == steps - 1) {
1138                                         modFlags |= SC_LASTSTEPINUNDOREDO;
1139                                         if (multiLine)
1140                                                 modFlags |= SC_MULTILINEUNDOREDO;
1141                                 }
1142                                 NotifyModified(DocModification(modFlags, action.position, action.lenData,
1143                                                                                            linesAdded, action.data));
1144                         }
1145
1146                         bool endSavePoint = cb.IsSavePoint();
1147                         if (startSavePoint != endSavePoint)
1148                                 NotifySavePoint(endSavePoint);
1149                 }
1150                 enteredModification--;
1151         }
1152         return newPos;
1153 }
1154
1155 int Document::Redo() {
1156         int newPos = -1;
1157         CheckReadOnly();
1158         if ((enteredModification == 0) && (cb.IsCollectingUndo())) {
1159                 enteredModification++;
1160                 if (!cb.IsReadOnly()) {
1161                         bool startSavePoint = cb.IsSavePoint();
1162                         bool multiLine = false;
1163                         int steps = cb.StartRedo();
1164                         for (int step = 0; step < steps; step++) {
1165                                 const int prevLinesTotal = LinesTotal();
1166                                 const Action &action = cb.GetRedoStep();
1167                                 if (action.at == insertAction) {
1168                                         NotifyModified(DocModification(
1169                                                                         SC_MOD_BEFOREINSERT | SC_PERFORMED_REDO, action));
1170                                 } else if (action.at == containerAction) {
1171                                         DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_REDO);
1172                                         dm.token = action.position;
1173                                         NotifyModified(dm);
1174                                 } else {
1175                                         NotifyModified(DocModification(
1176                                                                         SC_MOD_BEFOREDELETE | SC_PERFORMED_REDO, action));
1177                                 }
1178                                 cb.PerformRedoStep();
1179                                 if (action.at != containerAction) {
1180                                         ModifiedAt(action.position);
1181                                         newPos = action.position;
1182                                 }
1183
1184                                 int modFlags = SC_PERFORMED_REDO;
1185                                 if (action.at == insertAction) {
1186                                         newPos += action.lenData;
1187                                         modFlags |= SC_MOD_INSERTTEXT;
1188                                 } else if (action.at == removeAction) {
1189                                         modFlags |= SC_MOD_DELETETEXT;
1190                                 }
1191                                 if (steps > 1)
1192                                         modFlags |= SC_MULTISTEPUNDOREDO;
1193                                 const int linesAdded = LinesTotal() - prevLinesTotal;
1194                                 if (linesAdded != 0)
1195                                         multiLine = true;
1196                                 if (step == steps - 1) {
1197                                         modFlags |= SC_LASTSTEPINUNDOREDO;
1198                                         if (multiLine)
1199                                                 modFlags |= SC_MULTILINEUNDOREDO;
1200                                 }
1201                                 NotifyModified(
1202                                         DocModification(modFlags, action.position, action.lenData,
1203                                                                         linesAdded, action.data));
1204                         }
1205
1206                         bool endSavePoint = cb.IsSavePoint();
1207                         if (startSavePoint != endSavePoint)
1208                                 NotifySavePoint(endSavePoint);
1209                 }
1210                 enteredModification--;
1211         }
1212         return newPos;
1213 }
1214
1215 void Document::DelChar(int pos) {
1216         DeleteChars(pos, LenChar(pos));
1217 }
1218
1219 void Document::DelCharBack(int pos) {
1220         if (pos <= 0) {
1221                 return;
1222         } else if (IsCrLf(pos - 2)) {
1223                 DeleteChars(pos - 2, 2);
1224         } else if (dbcsCodePage) {
1225                 int startChar = NextPosition(pos, -1);
1226                 DeleteChars(startChar, pos - startChar);
1227         } else {
1228                 DeleteChars(pos - 1, 1);
1229         }
1230 }
1231
1232 static int NextTab(int pos, int tabSize) {
1233         return ((pos / tabSize) + 1) * tabSize;
1234 }
1235
1236 static std::string CreateIndentation(int indent, int tabSize, bool insertSpaces) {
1237         std::string indentation;
1238         if (!insertSpaces) {
1239                 while (indent >= tabSize) {
1240                         indentation += '\t';
1241                         indent -= tabSize;
1242                 }
1243         }
1244         while (indent > 0) {
1245                 indentation += ' ';
1246                 indent--;
1247         }
1248         return indentation;
1249 }
1250
1251 int SCI_METHOD Document::GetLineIndentation(Sci_Position line) {
1252         int indent = 0;
1253         if ((line >= 0) && (line < LinesTotal())) {
1254                 int lineStart = LineStart(line);
1255                 int length = Length();
1256                 for (int i = lineStart; i < length; i++) {
1257                         char ch = cb.CharAt(i);
1258                         if (ch == ' ')
1259                                 indent++;
1260                         else if (ch == '\t')
1261                                 indent = NextTab(indent, tabInChars);
1262                         else
1263                                 return indent;
1264                 }
1265         }
1266         return indent;
1267 }
1268
1269 int Document::SetLineIndentation(int line, int indent) {
1270         int indentOfLine = GetLineIndentation(line);
1271         if (indent < 0)
1272                 indent = 0;
1273         if (indent != indentOfLine) {
1274                 std::string linebuf = CreateIndentation(indent, tabInChars, !useTabs);
1275                 int thisLineStart = LineStart(line);
1276                 int indentPos = GetLineIndentPosition(line);
1277                 UndoGroup ug(this);
1278                 DeleteChars(thisLineStart, indentPos - thisLineStart);
1279                 return thisLineStart + InsertString(thisLineStart, linebuf.c_str(),
1280                         static_cast<int>(linebuf.length()));
1281         } else {
1282                 return GetLineIndentPosition(line);
1283         }
1284 }
1285
1286 int Document::GetLineIndentPosition(int line) const {
1287         if (line < 0)
1288                 return 0;
1289         int pos = LineStart(line);
1290         int length = Length();
1291         while ((pos < length) && IsSpaceOrTab(cb.CharAt(pos))) {
1292                 pos++;
1293         }
1294         return pos;
1295 }
1296
1297 int Document::GetColumn(int pos) {
1298         int column = 0;
1299         int line = LineFromPosition(pos);
1300         if ((line >= 0) && (line < LinesTotal())) {
1301                 for (int i = LineStart(line); i < pos;) {
1302                         char ch = cb.CharAt(i);
1303                         if (ch == '\t') {
1304                                 column = NextTab(column, tabInChars);
1305                                 i++;
1306                         } else if (ch == '\r') {
1307                                 return column;
1308                         } else if (ch == '\n') {
1309                                 return column;
1310                         } else if (i >= Length()) {
1311                                 return column;
1312                         } else {
1313                                 column++;
1314                                 i = NextPosition(i, 1);
1315                         }
1316                 }
1317         }
1318         return column;
1319 }
1320
1321 int Document::CountCharacters(int startPos, int endPos) const {
1322         startPos = MovePositionOutsideChar(startPos, 1, false);
1323         endPos = MovePositionOutsideChar(endPos, -1, false);
1324         int count = 0;
1325         int i = startPos;
1326         while (i < endPos) {
1327                 count++;
1328                 i = NextPosition(i, 1);
1329         }
1330         return count;
1331 }
1332
1333 int Document::CountUTF16(int startPos, int endPos) const {
1334         startPos = MovePositionOutsideChar(startPos, 1, false);
1335         endPos = MovePositionOutsideChar(endPos, -1, false);
1336         int count = 0;
1337         int i = startPos;
1338         while (i < endPos) {
1339                 count++;
1340                 const int next = NextPosition(i, 1);
1341                 if ((next - i) > 3)
1342                         count++;
1343                 i = next;
1344         }
1345         return count;
1346 }
1347
1348 int Document::FindColumn(int line, int column) {
1349         int position = LineStart(line);
1350         if ((line >= 0) && (line < LinesTotal())) {
1351                 int columnCurrent = 0;
1352                 while ((columnCurrent < column) && (position < Length())) {
1353                         char ch = cb.CharAt(position);
1354                         if (ch == '\t') {
1355                                 columnCurrent = NextTab(columnCurrent, tabInChars);
1356                                 if (columnCurrent > column)
1357                                         return position;
1358                                 position++;
1359                         } else if (ch == '\r') {
1360                                 return position;
1361                         } else if (ch == '\n') {
1362                                 return position;
1363                         } else {
1364                                 columnCurrent++;
1365                                 position = NextPosition(position, 1);
1366                         }
1367                 }
1368         }
1369         return position;
1370 }
1371
1372 void Document::Indent(bool forwards, int lineBottom, int lineTop) {
1373         // Dedent - suck white space off the front of the line to dedent by equivalent of a tab
1374         for (int line = lineBottom; line >= lineTop; line--) {
1375                 int indentOfLine = GetLineIndentation(line);
1376                 if (forwards) {
1377                         if (LineStart(line) < LineEnd(line)) {
1378                                 SetLineIndentation(line, indentOfLine + IndentSize());
1379                         }
1380                 } else {
1381                         SetLineIndentation(line, indentOfLine - IndentSize());
1382                 }
1383         }
1384 }
1385
1386 // Convert line endings for a piece of text to a particular mode.
1387 // Stop at len or when a NUL is found.
1388 std::string Document::TransformLineEnds(const char *s, size_t len, int eolModeWanted) {
1389         std::string dest;
1390         for (size_t i = 0; (i < len) && (s[i]); i++) {
1391                 if (s[i] == '\n' || s[i] == '\r') {
1392                         if (eolModeWanted == SC_EOL_CR) {
1393                                 dest.push_back('\r');
1394                         } else if (eolModeWanted == SC_EOL_LF) {
1395                                 dest.push_back('\n');
1396                         } else { // eolModeWanted == SC_EOL_CRLF
1397                                 dest.push_back('\r');
1398                                 dest.push_back('\n');
1399                         }
1400                         if ((s[i] == '\r') && (i+1 < len) && (s[i+1] == '\n')) {
1401                                 i++;
1402                         }
1403                 } else {
1404                         dest.push_back(s[i]);
1405                 }
1406         }
1407         return dest;
1408 }
1409
1410 void Document::ConvertLineEnds(int eolModeSet) {
1411         UndoGroup ug(this);
1412
1413         for (int pos = 0; pos < Length(); pos++) {
1414                 if (cb.CharAt(pos) == '\r') {
1415                         if (cb.CharAt(pos + 1) == '\n') {
1416                                 // CRLF
1417                                 if (eolModeSet == SC_EOL_CR) {
1418                                         DeleteChars(pos + 1, 1); // Delete the LF
1419                                 } else if (eolModeSet == SC_EOL_LF) {
1420                                         DeleteChars(pos, 1); // Delete the CR
1421                                 } else {
1422                                         pos++;
1423                                 }
1424                         } else {
1425                                 // CR
1426                                 if (eolModeSet == SC_EOL_CRLF) {
1427                                         pos += InsertString(pos + 1, "\n", 1); // Insert LF
1428                                 } else if (eolModeSet == SC_EOL_LF) {
1429                                         pos += InsertString(pos, "\n", 1); // Insert LF
1430                                         DeleteChars(pos, 1); // Delete CR
1431                                         pos--;
1432                                 }
1433                         }
1434                 } else if (cb.CharAt(pos) == '\n') {
1435                         // LF
1436                         if (eolModeSet == SC_EOL_CRLF) {
1437                                 pos += InsertString(pos, "\r", 1); // Insert CR
1438                         } else if (eolModeSet == SC_EOL_CR) {
1439                                 pos += InsertString(pos, "\r", 1); // Insert CR
1440                                 DeleteChars(pos, 1); // Delete LF
1441                                 pos--;
1442                         }
1443                 }
1444         }
1445
1446 }
1447
1448 bool Document::IsWhiteLine(int line) const {
1449         int currentChar = LineStart(line);
1450         int endLine = LineEnd(line);
1451         while (currentChar < endLine) {
1452                 if (cb.CharAt(currentChar) != ' ' && cb.CharAt(currentChar) != '\t') {
1453                         return false;
1454                 }
1455                 ++currentChar;
1456         }
1457         return true;
1458 }
1459
1460 int Document::ParaUp(int pos) const {
1461         int line = LineFromPosition(pos);
1462         line--;
1463         while (line >= 0 && IsWhiteLine(line)) { // skip empty lines
1464                 line--;
1465         }
1466         while (line >= 0 && !IsWhiteLine(line)) { // skip non-empty lines
1467                 line--;
1468         }
1469         line++;
1470         return LineStart(line);
1471 }
1472
1473 int Document::ParaDown(int pos) const {
1474         int line = LineFromPosition(pos);
1475         while (line < LinesTotal() && !IsWhiteLine(line)) { // skip non-empty lines
1476                 line++;
1477         }
1478         while (line < LinesTotal() && IsWhiteLine(line)) { // skip empty lines
1479                 line++;
1480         }
1481         if (line < LinesTotal())
1482                 return LineStart(line);
1483         else // end of a document
1484                 return LineEnd(line-1);
1485 }
1486
1487 CharClassify::cc Document::WordCharClass(unsigned char ch) const {
1488         if ((SC_CP_UTF8 == dbcsCodePage) && (!UTF8IsAscii(ch)))
1489                 return CharClassify::ccWord;
1490         return charClass.GetClass(ch);
1491 }
1492
1493 /**
1494  * Used by commmands that want to select whole words.
1495  * Finds the start of word at pos when delta < 0 or the end of the word when delta >= 0.
1496  */
1497 int Document::ExtendWordSelect(int pos, int delta, bool onlyWordCharacters) {
1498         CharClassify::cc ccStart = CharClassify::ccWord;
1499         if (delta < 0) {
1500                 if (!onlyWordCharacters)
1501                         ccStart = WordCharClass(cb.CharAt(pos-1));
1502                 while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == ccStart))
1503                         pos--;
1504         } else {
1505                 if (!onlyWordCharacters && pos < Length())
1506                         ccStart = WordCharClass(cb.CharAt(pos));
1507                 while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == ccStart))
1508                         pos++;
1509         }
1510         return MovePositionOutsideChar(pos, delta, true);
1511 }
1512
1513 /**
1514  * Find the start of the next word in either a forward (delta >= 0) or backwards direction
1515  * (delta < 0).
1516  * This is looking for a transition between character classes although there is also some
1517  * additional movement to transit white space.
1518  * Used by cursor movement by word commands.
1519  */
1520 int Document::NextWordStart(int pos, int delta) {
1521         if (delta < 0) {
1522                 while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == CharClassify::ccSpace))
1523                         pos--;
1524                 if (pos > 0) {
1525                         CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos-1));
1526                         while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == ccStart)) {
1527                                 pos--;
1528                         }
1529                 }
1530         } else {
1531                 CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos));
1532                 while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == ccStart))
1533                         pos++;
1534                 while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == CharClassify::ccSpace))
1535                         pos++;
1536         }
1537         return pos;
1538 }
1539
1540 /**
1541  * Find the end of the next word in either a forward (delta >= 0) or backwards direction
1542  * (delta < 0).
1543  * This is looking for a transition between character classes although there is also some
1544  * additional movement to transit white space.
1545  * Used by cursor movement by word commands.
1546  */
1547 int Document::NextWordEnd(int pos, int delta) {
1548         if (delta < 0) {
1549                 if (pos > 0) {
1550                         CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos-1));
1551                         if (ccStart != CharClassify::ccSpace) {
1552                                 while (pos > 0 && WordCharClass(cb.CharAt(pos - 1)) == ccStart) {
1553                                         pos--;
1554                                 }
1555                         }
1556                         while (pos > 0 && WordCharClass(cb.CharAt(pos - 1)) == CharClassify::ccSpace) {
1557                                 pos--;
1558                         }
1559                 }
1560         } else {
1561                 while (pos < Length() && WordCharClass(cb.CharAt(pos)) == CharClassify::ccSpace) {
1562                         pos++;
1563                 }
1564                 if (pos < Length()) {
1565                         CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos));
1566                         while (pos < Length() && WordCharClass(cb.CharAt(pos)) == ccStart) {
1567                                 pos++;
1568                         }
1569                 }
1570         }
1571         return pos;
1572 }
1573
1574 /**
1575  * Check that the character at the given position is a word or punctuation character and that
1576  * the previous character is of a different character class.
1577  */
1578 bool Document::IsWordStartAt(int pos) const {
1579         if (pos > 0) {
1580                 CharClassify::cc ccPos = WordCharClass(CharAt(pos));
1581                 return (ccPos == CharClassify::ccWord || ccPos == CharClassify::ccPunctuation) &&
1582                         (ccPos != WordCharClass(CharAt(pos - 1)));
1583         }
1584         return true;
1585 }
1586
1587 /**
1588  * Check that the character at the given position is a word or punctuation character and that
1589  * the next character is of a different character class.
1590  */
1591 bool Document::IsWordEndAt(int pos) const {
1592         if (pos < Length()) {
1593                 CharClassify::cc ccPrev = WordCharClass(CharAt(pos-1));
1594                 return (ccPrev == CharClassify::ccWord || ccPrev == CharClassify::ccPunctuation) &&
1595                         (ccPrev != WordCharClass(CharAt(pos)));
1596         }
1597         return true;
1598 }
1599
1600 /**
1601  * Check that the given range is has transitions between character classes at both
1602  * ends and where the characters on the inside are word or punctuation characters.
1603  */
1604 bool Document::IsWordAt(int start, int end) const {
1605         return (start < end) && IsWordStartAt(start) && IsWordEndAt(end);
1606 }
1607
1608 bool Document::MatchesWordOptions(bool word, bool wordStart, int pos, int length) const {
1609         return (!word && !wordStart) ||
1610                         (word && IsWordAt(pos, pos + length)) ||
1611                         (wordStart && IsWordStartAt(pos));
1612 }
1613
1614 bool Document::HasCaseFolder(void) const {
1615         return pcf != 0;
1616 }
1617
1618 void Document::SetCaseFolder(CaseFolder *pcf_) {
1619         delete pcf;
1620         pcf = pcf_;
1621 }
1622
1623 Document::CharacterExtracted Document::ExtractCharacter(int position) const {
1624         const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(position));
1625         if (UTF8IsAscii(leadByte)) {
1626                 // Common case: ASCII character
1627                 return CharacterExtracted(leadByte, 1);
1628         }
1629         const int widthCharBytes = UTF8BytesOfLead[leadByte];
1630         unsigned char charBytes[UTF8MaxBytes] = { leadByte, 0, 0, 0 };
1631         for (int b=1; b<widthCharBytes; b++)
1632                 charBytes[b] = static_cast<unsigned char>(cb.CharAt(position + b));
1633         int utf8status = UTF8Classify(charBytes, widthCharBytes);
1634         if (utf8status & UTF8MaskInvalid) {
1635                 // Treat as invalid and use up just one byte
1636                 return CharacterExtracted(unicodeReplacementChar, 1);
1637         } else {
1638                 return CharacterExtracted(UnicodeFromUTF8(charBytes), utf8status & UTF8MaskWidth);
1639         }
1640 }
1641
1642 /**
1643  * Find text in document, supporting both forward and backward
1644  * searches (just pass minPos > maxPos to do a backward search)
1645  * Has not been tested with backwards DBCS searches yet.
1646  */
1647 long Document::FindText(int minPos, int maxPos, const char *search,
1648                         int flags, int *length) {
1649         if (*length <= 0)
1650                 return minPos;
1651         const bool caseSensitive = (flags & SCFIND_MATCHCASE) != 0;
1652         const bool word = (flags & SCFIND_WHOLEWORD) != 0;
1653         const bool wordStart = (flags & SCFIND_WORDSTART) != 0;
1654         const bool regExp = (flags & SCFIND_REGEXP) != 0;
1655         if (regExp) {
1656                 if (!regex)
1657                         regex = CreateRegexSearch(&charClass);
1658                 return regex->FindText(this, minPos, maxPos, search, caseSensitive, word, wordStart, flags, length);
1659         } else {
1660
1661                 const bool forward = minPos <= maxPos;
1662                 const int increment = forward ? 1 : -1;
1663
1664                 // Range endpoints should not be inside DBCS characters, but just in case, move them.
1665                 const int startPos = MovePositionOutsideChar(minPos, increment, false);
1666                 const int endPos = MovePositionOutsideChar(maxPos, increment, false);
1667
1668                 // Compute actual search ranges needed
1669                 const int lengthFind = *length;
1670
1671                 //Platform::DebugPrintf("Find %d %d %s %d\n", startPos, endPos, ft->lpstrText, lengthFind);
1672                 const int limitPos = Platform::Maximum(startPos, endPos);
1673                 int pos = startPos;
1674                 if (!forward) {
1675                         // Back all of a character
1676                         pos = NextPosition(pos, increment);
1677                 }
1678                 if (caseSensitive) {
1679                         const int endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos;
1680                         const char charStartSearch =  search[0];
1681                         while (forward ? (pos < endSearch) : (pos >= endSearch)) {
1682                                 if (CharAt(pos) == charStartSearch) {
1683                                         bool found = (pos + lengthFind) <= limitPos;
1684                                         for (int indexSearch = 1; (indexSearch < lengthFind) && found; indexSearch++) {
1685                                                 found = CharAt(pos + indexSearch) == search[indexSearch];
1686                                         }
1687                                         if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
1688                                                 return pos;
1689                                         }
1690                                 }
1691                                 if (!NextCharacter(pos, increment))
1692                                         break;
1693                         }
1694                 } else if (SC_CP_UTF8 == dbcsCodePage) {
1695                         const size_t maxFoldingExpansion = 4;
1696                         std::vector<char> searchThing(lengthFind * UTF8MaxBytes * maxFoldingExpansion + 1);
1697                         const int lenSearch = static_cast<int>(
1698                                 pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind));
1699                         char bytes[UTF8MaxBytes + 1];
1700                         char folded[UTF8MaxBytes * maxFoldingExpansion + 1];
1701                         while (forward ? (pos < endPos) : (pos >= endPos)) {
1702                                 int widthFirstCharacter = 0;
1703                                 int posIndexDocument = pos;
1704                                 int indexSearch = 0;
1705                                 bool characterMatches = true;
1706                                 for (;;) {
1707                                         const unsigned char leadByte = static_cast<unsigned char>(cb.CharAt(posIndexDocument));
1708                                         bytes[0] = leadByte;
1709                                         int widthChar = 1;
1710                                         if (!UTF8IsAscii(leadByte)) {
1711                                                 const int widthCharBytes = UTF8BytesOfLead[leadByte];
1712                                                 for (int b=1; b<widthCharBytes; b++) {
1713                                                         bytes[b] = cb.CharAt(posIndexDocument+b);
1714                                                 }
1715                                                 widthChar = UTF8Classify(reinterpret_cast<const unsigned char *>(bytes), widthCharBytes) & UTF8MaskWidth;
1716                                         }
1717                                         if (!widthFirstCharacter)
1718                                                 widthFirstCharacter = widthChar;
1719                                         if ((posIndexDocument + widthChar) > limitPos)
1720                                                 break;
1721                                         const int lenFlat = static_cast<int>(pcf->Fold(folded, sizeof(folded), bytes, widthChar));
1722                                         folded[lenFlat] = 0;
1723                                         // Does folded match the buffer
1724                                         characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat);
1725                                         if (!characterMatches)
1726                                                 break;
1727                                         posIndexDocument += widthChar;
1728                                         indexSearch += lenFlat;
1729                                         if (indexSearch >= lenSearch)
1730                                                 break;
1731                                 }
1732                                 if (characterMatches && (indexSearch == static_cast<int>(lenSearch))) {
1733                                         if (MatchesWordOptions(word, wordStart, pos, posIndexDocument - pos)) {
1734                                                 *length = posIndexDocument - pos;
1735                                                 return pos;
1736                                         }
1737                                 }
1738                                 if (forward) {
1739                                         pos += widthFirstCharacter;
1740                                 } else {
1741                                         if (!NextCharacter(pos, increment))
1742                                                 break;
1743                                 }
1744                         }
1745                 } else if (dbcsCodePage) {
1746                         const size_t maxBytesCharacter = 2;
1747                         const size_t maxFoldingExpansion = 4;
1748                         std::vector<char> searchThing(lengthFind * maxBytesCharacter * maxFoldingExpansion + 1);
1749                         const int lenSearch = static_cast<int>(
1750                                 pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind));
1751                         while (forward ? (pos < endPos) : (pos >= endPos)) {
1752                                 int indexDocument = 0;
1753                                 int indexSearch = 0;
1754                                 bool characterMatches = true;
1755                                 while (characterMatches &&
1756                                         ((pos + indexDocument) < limitPos) &&
1757                                         (indexSearch < lenSearch)) {
1758                                         char bytes[maxBytesCharacter + 1];
1759                                         bytes[0] = cb.CharAt(pos + indexDocument);
1760                                         const int widthChar = IsDBCSLeadByte(bytes[0]) ? 2 : 1;
1761                                         if (widthChar == 2)
1762                                                 bytes[1] = cb.CharAt(pos + indexDocument + 1);
1763                                         if ((pos + indexDocument + widthChar) > limitPos)
1764                                                 break;
1765                                         char folded[maxBytesCharacter * maxFoldingExpansion + 1];
1766                                         const int lenFlat = static_cast<int>(pcf->Fold(folded, sizeof(folded), bytes, widthChar));
1767                                         folded[lenFlat] = 0;
1768                                         // Does folded match the buffer
1769                                         characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat);
1770                                         indexDocument += widthChar;
1771                                         indexSearch += lenFlat;
1772                                 }
1773                                 if (characterMatches && (indexSearch == static_cast<int>(lenSearch))) {
1774                                         if (MatchesWordOptions(word, wordStart, pos, indexDocument)) {
1775                                                 *length = indexDocument;
1776                                                 return pos;
1777                                         }
1778                                 }
1779                                 if (!NextCharacter(pos, increment))
1780                                         break;
1781                         }
1782                 } else {
1783                         const int endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos;
1784                         std::vector<char> searchThing(lengthFind + 1);
1785                         pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind);
1786                         while (forward ? (pos < endSearch) : (pos >= endSearch)) {
1787                                 bool found = (pos + lengthFind) <= limitPos;
1788                                 for (int indexSearch = 0; (indexSearch < lengthFind) && found; indexSearch++) {
1789                                         char ch = CharAt(pos + indexSearch);
1790                                         char folded[2];
1791                                         pcf->Fold(folded, sizeof(folded), &ch, 1);
1792                                         found = folded[0] == searchThing[indexSearch];
1793                                 }
1794                                 if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
1795                                         return pos;
1796                                 }
1797                                 if (!NextCharacter(pos, increment))
1798                                         break;
1799                         }
1800                 }
1801         }
1802         //Platform::DebugPrintf("Not found\n");
1803         return -1;
1804 }
1805
1806 const char *Document::SubstituteByPosition(const char *text, int *length) {
1807         if (regex)
1808                 return regex->SubstituteByPosition(this, text, length);
1809         else
1810                 return 0;
1811 }
1812
1813 int Document::LinesTotal() const {
1814         return cb.Lines();
1815 }
1816
1817 void Document::SetDefaultCharClasses(bool includeWordClass) {
1818     charClass.SetDefaultCharClasses(includeWordClass);
1819 }
1820
1821 void Document::SetCharClasses(const unsigned char *chars, CharClassify::cc newCharClass) {
1822     charClass.SetCharClasses(chars, newCharClass);
1823 }
1824
1825 int Document::GetCharsOfClass(CharClassify::cc characterClass, unsigned char *buffer) {
1826     return charClass.GetCharsOfClass(characterClass, buffer);
1827 }
1828
1829 void SCI_METHOD Document::StartStyling(Sci_Position position, char) {
1830         endStyled = position;
1831 }
1832
1833 bool SCI_METHOD Document::SetStyleFor(Sci_Position length, char style) {
1834         if (enteredStyling != 0) {
1835                 return false;
1836         } else {
1837                 enteredStyling++;
1838                 int prevEndStyled = endStyled;
1839                 if (cb.SetStyleFor(endStyled, length, style)) {
1840                         DocModification mh(SC_MOD_CHANGESTYLE | SC_PERFORMED_USER,
1841                                            prevEndStyled, length);
1842                         NotifyModified(mh);
1843                 }
1844                 endStyled += length;
1845                 enteredStyling--;
1846                 return true;
1847         }
1848 }
1849
1850 bool SCI_METHOD Document::SetStyles(Sci_Position length, const char *styles) {
1851         if (enteredStyling != 0) {
1852                 return false;
1853         } else {
1854                 enteredStyling++;
1855                 bool didChange = false;
1856                 int startMod = 0;
1857                 int endMod = 0;
1858                 for (int iPos = 0; iPos < length; iPos++, endStyled++) {
1859                         PLATFORM_ASSERT(endStyled < Length());
1860                         if (cb.SetStyleAt(endStyled, styles[iPos])) {
1861                                 if (!didChange) {
1862                                         startMod = endStyled;
1863                                 }
1864                                 didChange = true;
1865                                 endMod = endStyled;
1866                         }
1867                 }
1868                 if (didChange) {
1869                         DocModification mh(SC_MOD_CHANGESTYLE | SC_PERFORMED_USER,
1870                                            startMod, endMod - startMod + 1);
1871                         NotifyModified(mh);
1872                 }
1873                 enteredStyling--;
1874                 return true;
1875         }
1876 }
1877
1878 void Document::EnsureStyledTo(int pos) {
1879         if ((enteredStyling == 0) && (pos > GetEndStyled())) {
1880                 IncrementStyleClock();
1881                 if (pli && !pli->UseContainerLexing()) {
1882                         int lineEndStyled = LineFromPosition(GetEndStyled());
1883                         int endStyledTo = LineStart(lineEndStyled);
1884                         pli->Colourise(endStyledTo, pos);
1885                 } else {
1886                         // Ask the watchers to style, and stop as soon as one responds.
1887                         for (std::vector<WatcherWithUserData>::iterator it = watchers.begin();
1888                                 (pos > GetEndStyled()) && (it != watchers.end()); ++it) {
1889                                 it->watcher->NotifyStyleNeeded(this, it->userData, pos);
1890                         }
1891                 }
1892         }
1893 }
1894
1895 void Document::LexerChanged() {
1896         // Tell the watchers the lexer has changed.
1897         for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
1898                 it->watcher->NotifyLexerChanged(this, it->userData);
1899         }
1900 }
1901
1902 int SCI_METHOD Document::SetLineState(Sci_Position line, int state) {
1903         int statePrevious = static_cast<LineState *>(perLineData[ldState])->SetLineState(line, state);
1904         if (state != statePrevious) {
1905                 DocModification mh(SC_MOD_CHANGELINESTATE, LineStart(line), 0, 0, 0, line);
1906                 NotifyModified(mh);
1907         }
1908         return statePrevious;
1909 }
1910
1911 int SCI_METHOD Document::GetLineState(Sci_Position line) const {
1912         return static_cast<LineState *>(perLineData[ldState])->GetLineState(line);
1913 }
1914
1915 int Document::GetMaxLineState() {
1916         return static_cast<LineState *>(perLineData[ldState])->GetMaxLineState();
1917 }
1918
1919 void SCI_METHOD Document::ChangeLexerState(Sci_Position start, Sci_Position end) {
1920         DocModification mh(SC_MOD_LEXERSTATE, start, end-start, 0, 0, 0);
1921         NotifyModified(mh);
1922 }
1923
1924 StyledText Document::MarginStyledText(int line) const {
1925         LineAnnotation *pla = static_cast<LineAnnotation *>(perLineData[ldMargin]);
1926         return StyledText(pla->Length(line), pla->Text(line),
1927                 pla->MultipleStyles(line), pla->Style(line), pla->Styles(line));
1928 }
1929
1930 void Document::MarginSetText(int line, const char *text) {
1931         static_cast<LineAnnotation *>(perLineData[ldMargin])->SetText(line, text);
1932         DocModification mh(SC_MOD_CHANGEMARGIN, LineStart(line), 0, 0, 0, line);
1933         NotifyModified(mh);
1934 }
1935
1936 void Document::MarginSetStyle(int line, int style) {
1937         static_cast<LineAnnotation *>(perLineData[ldMargin])->SetStyle(line, style);
1938         NotifyModified(DocModification(SC_MOD_CHANGEMARGIN, LineStart(line), 0, 0, 0, line));
1939 }
1940
1941 void Document::MarginSetStyles(int line, const unsigned char *styles) {
1942         static_cast<LineAnnotation *>(perLineData[ldMargin])->SetStyles(line, styles);
1943         NotifyModified(DocModification(SC_MOD_CHANGEMARGIN, LineStart(line), 0, 0, 0, line));
1944 }
1945
1946 void Document::MarginClearAll() {
1947         int maxEditorLine = LinesTotal();
1948         for (int l=0; l<maxEditorLine; l++)
1949                 MarginSetText(l, 0);
1950         // Free remaining data
1951         static_cast<LineAnnotation *>(perLineData[ldMargin])->ClearAll();
1952 }
1953
1954 StyledText Document::AnnotationStyledText(int line) const {
1955         LineAnnotation *pla = static_cast<LineAnnotation *>(perLineData[ldAnnotation]);
1956         return StyledText(pla->Length(line), pla->Text(line),
1957                 pla->MultipleStyles(line), pla->Style(line), pla->Styles(line));
1958 }
1959
1960 void Document::AnnotationSetText(int line, const char *text) {
1961         if (line >= 0 && line < LinesTotal()) {
1962                 const int linesBefore = AnnotationLines(line);
1963                 static_cast<LineAnnotation *>(perLineData[ldAnnotation])->SetText(line, text);
1964                 const int linesAfter = AnnotationLines(line);
1965                 DocModification mh(SC_MOD_CHANGEANNOTATION, LineStart(line), 0, 0, 0, line);
1966                 mh.annotationLinesAdded = linesAfter - linesBefore;
1967                 NotifyModified(mh);
1968         }
1969 }
1970
1971 void Document::AnnotationSetStyle(int line, int style) {
1972         static_cast<LineAnnotation *>(perLineData[ldAnnotation])->SetStyle(line, style);
1973         DocModification mh(SC_MOD_CHANGEANNOTATION, LineStart(line), 0, 0, 0, line);
1974         NotifyModified(mh);
1975 }
1976
1977 void Document::AnnotationSetStyles(int line, const unsigned char *styles) {
1978         if (line >= 0 && line < LinesTotal()) {
1979                 static_cast<LineAnnotation *>(perLineData[ldAnnotation])->SetStyles(line, styles);
1980         }
1981 }
1982
1983 int Document::AnnotationLines(int line) const {
1984         return static_cast<LineAnnotation *>(perLineData[ldAnnotation])->Lines(line);
1985 }
1986
1987 void Document::AnnotationClearAll() {
1988         int maxEditorLine = LinesTotal();
1989         for (int l=0; l<maxEditorLine; l++)
1990                 AnnotationSetText(l, 0);
1991         // Free remaining data
1992         static_cast<LineAnnotation *>(perLineData[ldAnnotation])->ClearAll();
1993 }
1994
1995 void Document::IncrementStyleClock() {
1996         styleClock = (styleClock + 1) % 0x100000;
1997 }
1998
1999 void SCI_METHOD Document::DecorationFillRange(Sci_Position position, int value, Sci_Position fillLength) {
2000         if (decorations.FillRange(position, value, fillLength)) {
2001                 DocModification mh(SC_MOD_CHANGEINDICATOR | SC_PERFORMED_USER,
2002                                                         position, fillLength);
2003                 NotifyModified(mh);
2004         }
2005 }
2006
2007 bool Document::AddWatcher(DocWatcher *watcher, void *userData) {
2008         WatcherWithUserData wwud(watcher, userData);
2009         std::vector<WatcherWithUserData>::iterator it =
2010                 std::find(watchers.begin(), watchers.end(), wwud);
2011         if (it != watchers.end())
2012                 return false;
2013         watchers.push_back(wwud);
2014         return true;
2015 }
2016
2017 bool Document::RemoveWatcher(DocWatcher *watcher, void *userData) {
2018         std::vector<WatcherWithUserData>::iterator it =
2019                 std::find(watchers.begin(), watchers.end(), WatcherWithUserData(watcher, userData));
2020         if (it != watchers.end()) {
2021                 watchers.erase(it);
2022                 return true;
2023         }
2024         return false;
2025 }
2026
2027 void Document::NotifyModifyAttempt() {
2028         for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
2029                 it->watcher->NotifyModifyAttempt(this, it->userData);
2030         }
2031 }
2032
2033 void Document::NotifySavePoint(bool atSavePoint) {
2034         for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
2035                 it->watcher->NotifySavePoint(this, it->userData, atSavePoint);
2036         }
2037 }
2038
2039 void Document::NotifyModified(DocModification mh) {
2040         if (mh.modificationType & SC_MOD_INSERTTEXT) {
2041                 decorations.InsertSpace(mh.position, mh.length);
2042         } else if (mh.modificationType & SC_MOD_DELETETEXT) {
2043                 decorations.DeleteRange(mh.position, mh.length);
2044         }
2045         for (std::vector<WatcherWithUserData>::iterator it = watchers.begin(); it != watchers.end(); ++it) {
2046                 it->watcher->NotifyModified(this, mh, it->userData);
2047         }
2048 }
2049
2050 bool Document::IsWordPartSeparator(char ch) const {
2051         return (WordCharClass(ch) == CharClassify::ccWord) && IsPunctuation(ch);
2052 }
2053
2054 int Document::WordPartLeft(int pos) {
2055         if (pos > 0) {
2056                 --pos;
2057                 char startChar = cb.CharAt(pos);
2058                 if (IsWordPartSeparator(startChar)) {
2059                         while (pos > 0 && IsWordPartSeparator(cb.CharAt(pos))) {
2060                                 --pos;
2061                         }
2062                 }
2063                 if (pos > 0) {
2064                         startChar = cb.CharAt(pos);
2065                         --pos;
2066                         if (IsLowerCase(startChar)) {
2067                                 while (pos > 0 && IsLowerCase(cb.CharAt(pos)))
2068                                         --pos;
2069                                 if (!IsUpperCase(cb.CharAt(pos)) && !IsLowerCase(cb.CharAt(pos)))
2070                                         ++pos;
2071                         } else if (IsUpperCase(startChar)) {
2072                                 while (pos > 0 && IsUpperCase(cb.CharAt(pos)))
2073                                         --pos;
2074                                 if (!IsUpperCase(cb.CharAt(pos)))
2075                                         ++pos;
2076                         } else if (IsADigit(startChar)) {
2077                                 while (pos > 0 && IsADigit(cb.CharAt(pos)))
2078                                         --pos;
2079                                 if (!IsADigit(cb.CharAt(pos)))
2080                                         ++pos;
2081                         } else if (IsPunctuation(startChar)) {
2082                                 while (pos > 0 && IsPunctuation(cb.CharAt(pos)))
2083                                         --pos;
2084                                 if (!IsPunctuation(cb.CharAt(pos)))
2085                                         ++pos;
2086                         } else if (isspacechar(startChar)) {
2087                                 while (pos > 0 && isspacechar(cb.CharAt(pos)))
2088                                         --pos;
2089                                 if (!isspacechar(cb.CharAt(pos)))
2090                                         ++pos;
2091                         } else if (!IsASCII(startChar)) {
2092                                 while (pos > 0 && !IsASCII(cb.CharAt(pos)))
2093                                         --pos;
2094                                 if (IsASCII(cb.CharAt(pos)))
2095                                         ++pos;
2096                         } else {
2097                                 ++pos;
2098                         }
2099                 }
2100         }
2101         return pos;
2102 }
2103
2104 int Document::WordPartRight(int pos) {
2105         char startChar = cb.CharAt(pos);
2106         int length = Length();
2107         if (IsWordPartSeparator(startChar)) {
2108                 while (pos < length && IsWordPartSeparator(cb.CharAt(pos)))
2109                         ++pos;
2110                 startChar = cb.CharAt(pos);
2111         }
2112         if (!IsASCII(startChar)) {
2113                 while (pos < length && !IsASCII(cb.CharAt(pos)))
2114                         ++pos;
2115         } else if (IsLowerCase(startChar)) {
2116                 while (pos < length && IsLowerCase(cb.CharAt(pos)))
2117                         ++pos;
2118         } else if (IsUpperCase(startChar)) {
2119                 if (IsLowerCase(cb.CharAt(pos + 1))) {
2120                         ++pos;
2121                         while (pos < length && IsLowerCase(cb.CharAt(pos)))
2122                                 ++pos;
2123                 } else {
2124                         while (pos < length && IsUpperCase(cb.CharAt(pos)))
2125                                 ++pos;
2126                 }
2127                 if (IsLowerCase(cb.CharAt(pos)) && IsUpperCase(cb.CharAt(pos - 1)))
2128                         --pos;
2129         } else if (IsADigit(startChar)) {
2130                 while (pos < length && IsADigit(cb.CharAt(pos)))
2131                         ++pos;
2132         } else if (IsPunctuation(startChar)) {
2133                 while (pos < length && IsPunctuation(cb.CharAt(pos)))
2134                         ++pos;
2135         } else if (isspacechar(startChar)) {
2136                 while (pos < length && isspacechar(cb.CharAt(pos)))
2137                         ++pos;
2138         } else {
2139                 ++pos;
2140         }
2141         return pos;
2142 }
2143
2144 bool IsLineEndChar(char c) {
2145         return (c == '\n' || c == '\r');
2146 }
2147
2148 int Document::ExtendStyleRange(int pos, int delta, bool singleLine) {
2149         int sStart = cb.StyleAt(pos);
2150         if (delta < 0) {
2151                 while (pos > 0 && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsLineEndChar(cb.CharAt(pos))))
2152                         pos--;
2153                 pos++;
2154         } else {
2155                 while (pos < (Length()) && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsLineEndChar(cb.CharAt(pos))))
2156                         pos++;
2157         }
2158         return pos;
2159 }
2160
2161 static char BraceOpposite(char ch) {
2162         switch (ch) {
2163         case '(':
2164                 return ')';
2165         case ')':
2166                 return '(';
2167         case '[':
2168                 return ']';
2169         case ']':
2170                 return '[';
2171         case '{':
2172                 return '}';
2173         case '}':
2174                 return '{';
2175         case '<':
2176                 return '>';
2177         case '>':
2178                 return '<';
2179         default:
2180                 return '\0';
2181         }
2182 }
2183
2184 // TODO: should be able to extend styled region to find matching brace
2185 int Document::BraceMatch(int position, int /*maxReStyle*/) {
2186         char chBrace = CharAt(position);
2187         char chSeek = BraceOpposite(chBrace);
2188         if (chSeek == '\0')
2189                 return - 1;
2190         char styBrace = static_cast<char>(StyleAt(position));
2191         int direction = -1;
2192         if (chBrace == '(' || chBrace == '[' || chBrace == '{' || chBrace == '<')
2193                 direction = 1;
2194         int depth = 1;
2195         position = NextPosition(position, direction);
2196         while ((position >= 0) && (position < Length())) {
2197                 char chAtPos = CharAt(position);
2198                 char styAtPos = static_cast<char>(StyleAt(position));
2199                 if ((position > GetEndStyled()) || (styAtPos == styBrace)) {
2200                         if (chAtPos == chBrace)
2201                                 depth++;
2202                         if (chAtPos == chSeek)
2203                                 depth--;
2204                         if (depth == 0)
2205                                 return position;
2206                 }
2207                 int positionBeforeMove = position;
2208                 position = NextPosition(position, direction);
2209                 if (position == positionBeforeMove)
2210                         break;
2211         }
2212         return - 1;
2213 }
2214
2215 /**
2216  * Implementation of RegexSearchBase for the default built-in regular expression engine
2217  */
2218 class BuiltinRegex : public RegexSearchBase {
2219 public:
2220         explicit BuiltinRegex(CharClassify *charClassTable) : search(charClassTable) {}
2221
2222         virtual ~BuiltinRegex() {
2223         }
2224
2225         virtual long FindText(Document *doc, int minPos, int maxPos, const char *s,
2226                         bool caseSensitive, bool word, bool wordStart, int flags,
2227                         int *length);
2228
2229         virtual const char *SubstituteByPosition(Document *doc, const char *text, int *length);
2230
2231 private:
2232         RESearch search;
2233         std::string substituted;
2234 };
2235
2236 namespace {
2237
2238 /**
2239 * RESearchRange keeps track of search range.
2240 */
2241 class RESearchRange {
2242 public:
2243         const Document *doc;
2244         int increment;
2245         int startPos;
2246         int endPos;
2247         int lineRangeStart;
2248         int lineRangeEnd;
2249         int lineRangeBreak;
2250         RESearchRange(const Document *doc_, int minPos, int maxPos) : doc(doc_) {
2251                 increment = (minPos <= maxPos) ? 1 : -1;
2252
2253                 // Range endpoints should not be inside DBCS characters, but just in case, move them.
2254                 startPos = doc->MovePositionOutsideChar(minPos, 1, false);
2255                 endPos = doc->MovePositionOutsideChar(maxPos, 1, false);
2256
2257                 lineRangeStart = doc->LineFromPosition(startPos);
2258                 lineRangeEnd = doc->LineFromPosition(endPos);
2259                 if ((increment == 1) &&
2260                         (startPos >= doc->LineEnd(lineRangeStart)) &&
2261                         (lineRangeStart < lineRangeEnd)) {
2262                         // the start position is at end of line or between line end characters.
2263                         lineRangeStart++;
2264                         startPos = doc->LineStart(lineRangeStart);
2265                 } else if ((increment == -1) &&
2266                         (startPos <= doc->LineStart(lineRangeStart)) &&
2267                         (lineRangeStart > lineRangeEnd)) {
2268                         // the start position is at beginning of line.
2269                         lineRangeStart--;
2270                         startPos = doc->LineEnd(lineRangeStart);
2271                 }
2272                 lineRangeBreak = lineRangeEnd + increment;
2273         }
2274         Range LineRange(int line) const {
2275                 Range range(doc->LineStart(line), doc->LineEnd(line));
2276                 if (increment == 1) {
2277                         if (line == lineRangeStart)
2278                                 range.start = startPos;
2279                         if (line == lineRangeEnd)
2280                                 range.end = endPos;
2281                 } else {
2282                         if (line == lineRangeEnd)
2283                                 range.start = endPos;
2284                         if (line == lineRangeStart)
2285                                 range.end = startPos;
2286                 }
2287                 return range;
2288         }
2289 };
2290
2291 // Define a way for the Regular Expression code to access the document
2292 class DocumentIndexer : public CharacterIndexer {
2293         Document *pdoc;
2294         int end;
2295 public:
2296         DocumentIndexer(Document *pdoc_, int end_) :
2297                 pdoc(pdoc_), end(end_) {
2298         }
2299
2300         virtual ~DocumentIndexer() {
2301         }
2302
2303         virtual char CharAt(int index) {
2304                 if (index < 0 || index >= end)
2305                         return 0;
2306                 else
2307                         return pdoc->CharAt(index);
2308         }
2309 };
2310
2311 #ifdef CXX11_REGEX
2312
2313 class ByteIterator : public std::iterator<std::bidirectional_iterator_tag, char> {
2314 public:
2315         const Document *doc;
2316         Position position;
2317         ByteIterator(const Document *doc_ = 0, Position position_ = 0) : doc(doc_), position(position_) {
2318         }
2319         ByteIterator(const ByteIterator &other) {
2320                 doc = other.doc;
2321                 position = other.position;
2322         }
2323         ByteIterator &operator=(const ByteIterator &other) {
2324                 if (this != &other) {
2325                         doc = other.doc;
2326                         position = other.position;
2327                 }
2328                 return *this;
2329         }
2330         char operator*() const {
2331                 return doc->CharAt(position);
2332         }
2333         ByteIterator &operator++() {
2334                 position++;
2335                 return *this;
2336         }
2337         ByteIterator operator++(int) {
2338                 ByteIterator retVal(*this);
2339                 position++;
2340                 return retVal;
2341         }
2342         ByteIterator &operator--() {
2343                 position--;
2344                 return *this;
2345         }
2346         bool operator==(const ByteIterator &other) const {
2347                 return doc == other.doc && position == other.position;
2348         }
2349         bool operator!=(const ByteIterator &other) const {
2350                 return doc != other.doc || position != other.position;
2351         }
2352         int Pos() const {
2353                 return position;
2354         }
2355         int PosRoundUp() const {
2356                 return position;
2357         }
2358 };
2359
2360 // On Windows, wchar_t is 16 bits wide and on Unix it is 32 bits wide.
2361 // Would be better to use sizeof(wchar_t) or similar to differentiate
2362 // but easier for now to hard-code platforms.
2363 // C++11 has char16_t and char32_t but neither Clang nor Visual C++
2364 // appear to allow specializing basic_regex over these.
2365
2366 #ifdef _WIN32
2367 #define WCHAR_T_IS_16 1
2368 #else
2369 #define WCHAR_T_IS_16 0
2370 #endif
2371
2372 #if WCHAR_T_IS_16
2373
2374 // On Windows, report non-BMP characters as 2 separate surrogates as that
2375 // matches wregex since it is based on wchar_t.
2376 class UTF8Iterator : public std::iterator<std::bidirectional_iterator_tag, wchar_t> {
2377         // These 3 fields determine the iterator position and are used for comparisons
2378         const Document *doc;
2379         Position position;
2380         size_t characterIndex;
2381         // Remaining fields are derived from the determining fields so are excluded in comparisons
2382         unsigned int lenBytes;
2383         size_t lenCharacters;
2384         wchar_t buffered[2];
2385 public:
2386         UTF8Iterator(const Document *doc_ = 0, Position position_ = 0) :
2387                 doc(doc_), position(position_), characterIndex(0), lenBytes(0), lenCharacters(0) {
2388                 buffered[0] = 0;
2389                 buffered[1] = 0;
2390                 if (doc) {
2391                         ReadCharacter();
2392                 }
2393         }
2394         UTF8Iterator(const UTF8Iterator &other) {
2395                 doc = other.doc;
2396                 position = other.position;
2397                 characterIndex = other.characterIndex;
2398                 lenBytes = other.lenBytes;
2399                 lenCharacters = other.lenCharacters;
2400                 buffered[0] = other.buffered[0];
2401                 buffered[1] = other.buffered[1];
2402         }
2403         UTF8Iterator &operator=(const UTF8Iterator &other) {
2404                 if (this != &other) {
2405                         doc = other.doc;
2406                         position = other.position;
2407                         characterIndex = other.characterIndex;
2408                         lenBytes = other.lenBytes;
2409                         lenCharacters = other.lenCharacters;
2410                         buffered[0] = other.buffered[0];
2411                         buffered[1] = other.buffered[1];
2412                 }
2413                 return *this;
2414         }
2415         wchar_t operator*() const {
2416                 assert(lenCharacters != 0);
2417                 return buffered[characterIndex];
2418         }
2419         UTF8Iterator &operator++() {
2420                 if ((characterIndex + 1) < (lenCharacters)) {
2421                         characterIndex++;
2422                 } else {
2423                         position += lenBytes;
2424                         ReadCharacter();
2425                         characterIndex = 0;
2426                 }
2427                 return *this;
2428         }
2429         UTF8Iterator operator++(int) {
2430                 UTF8Iterator retVal(*this);
2431                 if ((characterIndex + 1) < (lenCharacters)) {
2432                         characterIndex++;
2433                 } else {
2434                         position += lenBytes;
2435                         ReadCharacter();
2436                         characterIndex = 0;
2437                 }
2438                 return retVal;
2439         }
2440         UTF8Iterator &operator--() {
2441                 if (characterIndex) {
2442                         characterIndex--;
2443                 } else {
2444                         position = doc->NextPosition(position, -1);
2445                         ReadCharacter();
2446                         characterIndex = lenCharacters - 1;
2447                 }
2448                 return *this;
2449         }
2450         bool operator==(const UTF8Iterator &other) const {
2451                 // Only test the determining fields, not the character widths and values derived from this
2452                 return doc == other.doc &&
2453                         position == other.position &&
2454                         characterIndex == other.characterIndex;
2455         }
2456         bool operator!=(const UTF8Iterator &other) const {
2457                 // Only test the determining fields, not the character widths and values derived from this
2458                 return doc != other.doc ||
2459                         position != other.position ||
2460                         characterIndex != other.characterIndex;
2461         }
2462         int Pos() const {
2463                 return position;
2464         }
2465         int PosRoundUp() const {
2466                 if (characterIndex)
2467                         return position + lenBytes;     // Force to end of character
2468                 else
2469                         return position;
2470         }
2471 private:
2472         void ReadCharacter() {
2473                 Document::CharacterExtracted charExtracted = doc->ExtractCharacter(position);
2474                 lenBytes = charExtracted.widthBytes;
2475                 if (charExtracted.character == unicodeReplacementChar) {
2476                         lenCharacters = 1;
2477                         buffered[0] = static_cast<wchar_t>(charExtracted.character);
2478                 } else {
2479                         lenCharacters = UTF16FromUTF32Character(charExtracted.character, buffered);
2480                 }
2481         }
2482 };
2483
2484 #else
2485
2486 // On Unix, report non-BMP characters as single characters
2487
2488 class UTF8Iterator : public std::iterator<std::bidirectional_iterator_tag, wchar_t> {
2489         const Document *doc;
2490         Position position;
2491 public:
2492         UTF8Iterator(const Document *doc_=0, Position position_=0) : doc(doc_), position(position_) {
2493         }
2494         UTF8Iterator(const UTF8Iterator &other) {
2495                 doc = other.doc;
2496                 position = other.position;
2497         }
2498         UTF8Iterator &operator=(const UTF8Iterator &other) {
2499                 if (this != &other) {
2500                         doc = other.doc;
2501                         position = other.position;
2502                 }
2503                 return *this;
2504         }
2505         wchar_t operator*() const {
2506                 Document::CharacterExtracted charExtracted = doc->ExtractCharacter(position);
2507                 return charExtracted.character;
2508         }
2509         UTF8Iterator &operator++() {
2510                 position = doc->NextPosition(position, 1);
2511                 return *this;
2512         }
2513         UTF8Iterator operator++(int) {
2514                 UTF8Iterator retVal(*this);
2515                 position = doc->NextPosition(position, 1);
2516                 return retVal;
2517         }
2518         UTF8Iterator &operator--() {
2519                 position = doc->NextPosition(position, -1);
2520                 return *this;
2521         }
2522         bool operator==(const UTF8Iterator &other) const {
2523                 return doc == other.doc && position == other.position;
2524         }
2525         bool operator!=(const UTF8Iterator &other) const {
2526                 return doc != other.doc || position != other.position;
2527         }
2528         int Pos() const {
2529                 return position;
2530         }
2531         int PosRoundUp() const {
2532                 return position;
2533         }
2534 };
2535
2536 #endif
2537
2538 std::regex_constants::match_flag_type MatchFlags(const Document *doc, int startPos, int endPos) {
2539         std::regex_constants::match_flag_type flagsMatch = std::regex_constants::match_default;
2540         if (!doc->IsLineStartPosition(startPos))
2541                 flagsMatch |= std::regex_constants::match_not_bol;
2542         if (!doc->IsLineEndPosition(endPos))
2543                 flagsMatch |= std::regex_constants::match_not_eol;
2544         return flagsMatch;
2545 }
2546
2547 template<typename Iterator, typename Regex>
2548 bool MatchOnLines(const Document *doc, const Regex &regexp, const RESearchRange &resr, RESearch &search) {
2549         bool matched = false;
2550         std::match_results<Iterator> match;
2551
2552         // MSVC and libc++ have problems with ^ and $ matching line ends inside a range
2553         // If they didn't then the line by line iteration could be removed for the forwards
2554         // case and replaced with the following 4 lines:
2555         //      Iterator uiStart(doc, startPos);
2556         //      Iterator uiEnd(doc, endPos);
2557         //      flagsMatch = MatchFlags(doc, startPos, endPos);
2558         //      matched = std::regex_search(uiStart, uiEnd, match, regexp, flagsMatch);
2559
2560         // Line by line.
2561         for (int line = resr.lineRangeStart; line != resr.lineRangeBreak; line += resr.increment) {
2562                 const Range lineRange = resr.LineRange(line);
2563                 Iterator itStart(doc, lineRange.start);
2564                 Iterator itEnd(doc, lineRange.end);
2565                 std::regex_constants::match_flag_type flagsMatch = MatchFlags(doc, lineRange.start, lineRange.end);
2566                 matched = std::regex_search(itStart, itEnd, match, regexp, flagsMatch);
2567                 // Check for the last match on this line.
2568                 if (matched) {
2569                         if (resr.increment == -1) {
2570                                 while (matched) {
2571                                         Iterator itNext(doc, match[0].second.PosRoundUp());
2572                                         flagsMatch = MatchFlags(doc, itNext.Pos(), lineRange.end);
2573                                         std::match_results<Iterator> matchNext;
2574                                         matched = std::regex_search(itNext, itEnd, matchNext, regexp, flagsMatch);
2575                                         if (matched) {
2576                                                 if (match[0].first == match[0].second) {
2577                                                         // Empty match means failure so exit
2578                                                         return false;
2579                                                 }
2580                                                 match = matchNext;
2581                                         }
2582                                 }
2583                                 matched = true;
2584                         }
2585                         break;
2586                 }
2587         }
2588         if (matched) {
2589                 for (size_t co = 0; co < match.size(); co++) {
2590                         search.bopat[co] = match[co].first.Pos();
2591                         search.eopat[co] = match[co].second.PosRoundUp();
2592                         size_t lenMatch = search.eopat[co] - search.bopat[co];
2593                         search.pat[co].resize(lenMatch);
2594                         for (size_t iPos = 0; iPos < lenMatch; iPos++) {
2595                                 search.pat[co][iPos] = doc->CharAt(iPos + search.bopat[co]);
2596                         }
2597                 }
2598         }
2599         return matched;
2600 }
2601
2602 long Cxx11RegexFindText(Document *doc, int minPos, int maxPos, const char *s,
2603         bool caseSensitive, int *length, RESearch &search) {
2604         const RESearchRange resr(doc, minPos, maxPos);
2605         try {
2606                 //ElapsedTime et;
2607                 std::regex::flag_type flagsRe = std::regex::ECMAScript;
2608                 // Flags that apper to have no effect:
2609                 // | std::regex::collate | std::regex::extended;
2610                 if (!caseSensitive)
2611                         flagsRe = flagsRe | std::regex::icase;
2612
2613                 // Clear the RESearch so can fill in matches
2614                 search.Clear();
2615
2616                 bool matched = false;
2617                 if (SC_CP_UTF8 == doc->dbcsCodePage) {
2618                         unsigned int lenS = static_cast<unsigned int>(strlen(s));
2619                         std::vector<wchar_t> ws(lenS + 1);
2620 #if WCHAR_T_IS_16
2621                         size_t outLen = UTF16FromUTF8(s, lenS, &ws[0], lenS);
2622 #else
2623                         size_t outLen = UTF32FromUTF8(s, lenS, reinterpret_cast<unsigned int *>(&ws[0]), lenS);
2624 #endif
2625                         ws[outLen] = 0;
2626                         std::wregex regexp;
2627 #if defined(__APPLE__)
2628                         // Using a UTF-8 locale doesn't change to Unicode over a byte buffer so '.'
2629                         // is one byte not one character.
2630                         // However, on OS X this makes wregex act as Unicode
2631                         std::locale localeU("en_US.UTF-8");
2632                         regexp.imbue(localeU);
2633 #endif
2634                         regexp.assign(&ws[0], flagsRe);
2635                         matched = MatchOnLines<UTF8Iterator>(doc, regexp, resr, search);
2636
2637                 } else {
2638                         std::regex regexp;
2639                         regexp.assign(s, flagsRe);
2640                         matched = MatchOnLines<ByteIterator>(doc, regexp, resr, search);
2641                 }
2642
2643                 int posMatch = -1;
2644                 if (matched) {
2645                         posMatch = search.bopat[0];
2646                         *length = search.eopat[0] - search.bopat[0];
2647                 }
2648                 // Example - search in doc/ScintillaHistory.html for
2649                 // [[:upper:]]eta[[:space:]]
2650                 // On MacBook, normally around 1 second but with locale imbued -> 14 seconds.
2651                 //double durSearch = et.Duration(true);
2652                 //Platform::DebugPrintf("Search:%9.6g \n", durSearch);
2653                 return posMatch;
2654         } catch (std::regex_error &) {
2655                 // Failed to create regular expression
2656                 throw RegexError();
2657         } catch (...) {
2658                 // Failed in some other way
2659                 return -1;
2660         }
2661 }
2662
2663 #endif
2664
2665 }
2666
2667 long BuiltinRegex::FindText(Document *doc, int minPos, int maxPos, const char *s,
2668                         bool caseSensitive, bool, bool, int flags,
2669                         int *length) {
2670
2671 #ifdef CXX11_REGEX
2672         if (flags & SCFIND_CXX11REGEX) {
2673                         return Cxx11RegexFindText(doc, minPos, maxPos, s,
2674                         caseSensitive, length, search);
2675         }
2676 #endif
2677
2678         const RESearchRange resr(doc, minPos, maxPos);
2679
2680         const bool posix = (flags & SCFIND_POSIX) != 0;
2681
2682         const char *errmsg = search.Compile(s, *length, caseSensitive, posix);
2683         if (errmsg) {
2684                 return -1;
2685         }
2686         // Find a variable in a property file: \$(\([A-Za-z0-9_.]+\))
2687         // Replace first '.' with '-' in each property file variable reference:
2688         //     Search: \$(\([A-Za-z0-9_-]+\)\.\([A-Za-z0-9_.]+\))
2689         //     Replace: $(\1-\2)
2690         int pos = -1;
2691         int lenRet = 0;
2692         const char searchEnd = s[*length - 1];
2693         const char searchEndPrev = (*length > 1) ? s[*length - 2] : '\0';
2694         for (int line = resr.lineRangeStart; line != resr.lineRangeBreak; line += resr.increment) {
2695                 int startOfLine = doc->LineStart(line);
2696                 int endOfLine = doc->LineEnd(line);
2697                 if (resr.increment == 1) {
2698                         if (line == resr.lineRangeStart) {
2699                                 if ((resr.startPos != startOfLine) && (s[0] == '^'))
2700                                         continue;       // Can't match start of line if start position after start of line
2701                                 startOfLine = resr.startPos;
2702                         }
2703                         if (line == resr.lineRangeEnd) {
2704                                 if ((resr.endPos != endOfLine) && (searchEnd == '$') && (searchEndPrev != '\\'))
2705                                         continue;       // Can't match end of line if end position before end of line
2706                                 endOfLine = resr.endPos;
2707                         }
2708                 } else {
2709                         if (line == resr.lineRangeEnd) {
2710                                 if ((resr.endPos != startOfLine) && (s[0] == '^'))
2711                                         continue;       // Can't match start of line if end position after start of line
2712                                 startOfLine = resr.endPos;
2713                         }
2714                         if (line == resr.lineRangeStart) {
2715                                 if ((resr.startPos != endOfLine) && (searchEnd == '$') && (searchEndPrev != '\\'))
2716                                         continue;       // Can't match end of line if start position before end of line
2717                                 endOfLine = resr.startPos;
2718                         }
2719                 }
2720
2721                 DocumentIndexer di(doc, endOfLine);
2722                 int success = search.Execute(di, startOfLine, endOfLine);
2723                 if (success) {
2724                         pos = search.bopat[0];
2725                         // Ensure only whole characters selected
2726                         search.eopat[0] = doc->MovePositionOutsideChar(search.eopat[0], 1, false);
2727                         lenRet = search.eopat[0] - search.bopat[0];
2728                         // There can be only one start of a line, so no need to look for last match in line
2729                         if ((resr.increment == -1) && (s[0] != '^')) {
2730                                 // Check for the last match on this line.
2731                                 int repetitions = 1000; // Break out of infinite loop
2732                                 while (success && (search.eopat[0] <= endOfLine) && (repetitions--)) {
2733                                         success = search.Execute(di, pos+1, endOfLine);
2734                                         if (success) {
2735                                                 if (search.eopat[0] <= minPos) {
2736                                                         pos = search.bopat[0];
2737                                                         lenRet = search.eopat[0] - search.bopat[0];
2738                                                 } else {
2739                                                         success = 0;
2740                                                 }
2741                                         }
2742                                 }
2743                         }
2744                         break;
2745                 }
2746         }
2747         *length = lenRet;
2748         return pos;
2749 }
2750
2751 const char *BuiltinRegex::SubstituteByPosition(Document *doc, const char *text, int *length) {
2752         substituted.clear();
2753         DocumentIndexer di(doc, doc->Length());
2754         search.GrabMatches(di);
2755         for (int j = 0; j < *length; j++) {
2756                 if (text[j] == '\\') {
2757                         if (text[j + 1] >= '0' && text[j + 1] <= '9') {
2758                                 unsigned int patNum = text[j + 1] - '0';
2759                                 unsigned int len = search.eopat[patNum] - search.bopat[patNum];
2760                                 if (!search.pat[patNum].empty())        // Will be null if try for a match that did not occur
2761                                         substituted.append(search.pat[patNum].c_str(), len);
2762                                 j++;
2763                         } else {
2764                                 j++;
2765                                 switch (text[j]) {
2766                                 case 'a':
2767                                         substituted.push_back('\a');
2768                                         break;
2769                                 case 'b':
2770                                         substituted.push_back('\b');
2771                                         break;
2772                                 case 'f':
2773                                         substituted.push_back('\f');
2774                                         break;
2775                                 case 'n':
2776                                         substituted.push_back('\n');
2777                                         break;
2778                                 case 'r':
2779                                         substituted.push_back('\r');
2780                                         break;
2781                                 case 't':
2782                                         substituted.push_back('\t');
2783                                         break;
2784                                 case 'v':
2785                                         substituted.push_back('\v');
2786                                         break;
2787                                 case '\\':
2788                                         substituted.push_back('\\');
2789                                         break;
2790                                 default:
2791                                         substituted.push_back('\\');
2792                                         j--;
2793                                 }
2794                         }
2795                 } else {
2796                         substituted.push_back(text[j]);
2797                 }
2798         }
2799         *length = static_cast<int>(substituted.length());
2800         return substituted.c_str();
2801 }
2802
2803 #ifndef SCI_OWNREGEX
2804
2805 #ifdef SCI_NAMESPACE
2806
2807 RegexSearchBase *Scintilla::CreateRegexSearch(CharClassify *charClassTable) {
2808         return new BuiltinRegex(charClassTable);
2809 }
2810
2811 #else
2812
2813 RegexSearchBase *CreateRegexSearch(CharClassify *charClassTable) {
2814         return new BuiltinRegex(charClassTable);
2815 }
2816
2817 #endif
2818
2819 #endif