ext/scintilla/src/Document.cxx

   1 // Scintilla source code edit control
   2 /** @file Document.cxx
   3  ** Text document that handles notifications, DBCS, styling, words and end of line.
   4  **/
   5 // Copyright 1998-2011 by Neil Hodgson <neilh@scintilla.org>
   6 // The License.txt file describes the conditions under which this software may be distributed.
   7
   8 #include <stdlib.h>
   9 #include <string.h>
  10 #include <stdio.h>
  11 #include <ctype.h>
  12 #include <assert.h>
  13
  14 #include <string>
  15 #include <vector>
  16
  17 #include "Platform.h"
  18
  19 #include "ILexer.h"
  20 #include "Scintilla.h"
  21
  22 #include "SplitVector.h"
  23 #include "Partitioning.h"
  24 #include "RunStyles.h"
  25 #include "CellBuffer.h"
  26 #include "PerLine.h"
  27 #include "CharClassify.h"
  28 #include "CharacterSet.h"
  29 #include "Decoration.h"
  30 #include "Document.h"
  31 #include "RESearch.h"
  32 #include "UniConversion.h"
  33
  34 #ifdef SCI_NAMESPACE
  35 using namespace Scintilla;
  36 #endif
  37
  38 // This is ASCII specific but is safe with chars >= 0x80
  39 static inline bool isspacechar(unsigned char ch) {
  40         return (ch == ' ') || ((ch >= 0x09) && (ch <= 0x0d));
  41 }
  42
  43 static inline bool IsPunctuation(char ch) {
  44         return isascii(ch) && ispunct(ch);
  45 }
  46
  47 static inline bool IsADigit(char ch) {
  48         return isascii(ch) && isdigit(ch);
  49 }
  50
  51 static inline bool IsLowerCase(char ch) {
  52         return isascii(ch) && islower(ch);
  53 }
  54
  55 static inline bool IsUpperCase(char ch) {
  56         return isascii(ch) && isupper(ch);
  57 }
  58
  59 void LexInterface::Colourise(int start, int end) {
  60         ElapsedTime et;
  61         if (pdoc && instance && !performingStyle) {
  62                 // Protect against reentrance, which may occur, for example, when
  63                 // fold points are discovered while performing styling and the folding
  64                 // code looks for child lines which may trigger styling.
  65                 performingStyle = true;
  66
  67                 int lengthDoc = pdoc->Length();
  68                 if (end == -1)
  69                         end = lengthDoc;
  70                 int len = end - start;
  71
  72                 PLATFORM_ASSERT(len >= 0);
  73                 PLATFORM_ASSERT(start + len <= lengthDoc);
  74
  75                 int styleStart = 0;
  76                 if (start > 0)
  77                         styleStart = pdoc->StyleAt(start - 1) & pdoc->stylingBitsMask;
  78
  79                 if (len > 0) {
  80                         instance->Lex(start, len, styleStart, pdoc);
  81                         instance->Fold(start, len, styleStart, pdoc);
  82                 }
  83
  84                 performingStyle = false;
  85         }
  86 }
  87
  88 Document::Document() {
  89         refCount = 0;
  90 #ifdef _WIN32
  91         eolMode = SC_EOL_CRLF;
  92 #else
  93         eolMode = SC_EOL_LF;
  94 #endif
  95         dbcsCodePage = 0;
  96         stylingBits = 5;
  97         stylingBitsMask = 0x1F;
  98         stylingMask = 0;
  99         endStyled = 0;
 100         styleClock = 0;
 101         enteredModification = 0;
 102         enteredStyling = 0;
 103         enteredReadOnlyCount = 0;
 104         tabInChars = 8;
 105         indentInChars = 0;
 106         actualIndentInChars = 8;
 107         useTabs = true;
 108         tabIndents = true;
 109         backspaceUnindents = false;
 110         watchers = 0;
 111         lenWatchers = 0;
 112
 113         matchesValid = false;
 114         regex = 0;
 115
 116         perLineData[ldMarkers] = new LineMarkers();
 117         perLineData[ldLevels] = new LineLevels();
 118         perLineData[ldState] = new LineState();
 119         perLineData[ldMargin] = new LineAnnotation();
 120         perLineData[ldAnnotation] = new LineAnnotation();
 121
 122         cb.SetPerLine(this);
 123
 124         pli = 0;
 125 }
 126
 127 Document::~Document() {
 128         for (int i = 0; i < lenWatchers; i++) {
 129                 watchers[i].watcher->NotifyDeleted(this, watchers[i].userData);
 130         }
 131         delete []watchers;
 132         for (int j=0; j<ldSize; j++) {
 133                 delete perLineData[j];
 134                 perLineData[j] = 0;
 135         }
 136         watchers = 0;
 137         lenWatchers = 0;
 138         delete regex;
 139         regex = 0;
 140         delete pli;
 141         pli = 0;
 142 }
 143
 144 void Document::Init() {
 145         for (int j=0; j<ldSize; j++) {
 146                 if (perLineData[j])
 147                         perLineData[j]->Init();
 148         }
 149 }
 150
 151 void Document::InsertLine(int line) {
 152         for (int j=0; j<ldSize; j++) {
 153                 if (perLineData[j])
 154                         perLineData[j]->InsertLine(line);
 155         }
 156 }
 157
 158 void Document::RemoveLine(int line) {
 159         for (int j=0; j<ldSize; j++) {
 160                 if (perLineData[j])
 161                         perLineData[j]->RemoveLine(line);
 162         }
 163 }
 164
 165 // Increase reference count and return its previous value.
 166 int Document::AddRef() {
 167         return refCount++;
 168 }
 169
 170 // Decrease reference count and return its previous value.
 171 // Delete the document if reference count reaches zero.
 172 int Document::Release() {
 173         int curRefCount = --refCount;
 174         if (curRefCount == 0)
 175                 delete this;
 176         return curRefCount;
 177 }
 178
 179 void Document::SetSavePoint() {
 180         cb.SetSavePoint();
 181         NotifySavePoint(true);
 182 }
 183
 184 int Document::GetMark(int line) {
 185         return static_cast<LineMarkers *>(perLineData[ldMarkers])->MarkValue(line);
 186 }
 187
 188 int Document::AddMark(int line, int markerNum) {
 189         if (line >= 0 && line <= LinesTotal()) {
 190                 int prev = static_cast<LineMarkers *>(perLineData[ldMarkers])->
 191                         AddMark(line, markerNum, LinesTotal());
 192                 DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, 0, line);
 193                 NotifyModified(mh);
 194                 return prev;
 195         } else {
 196                 return 0;
 197         }
 198 }
 199
 200 void Document::AddMarkSet(int line, int valueSet) {
 201         if (line < 0 || line > LinesTotal()) {
 202                 return;
 203         }
 204         unsigned int m = valueSet;
 205         for (int i = 0; m; i++, m >>= 1)
 206                 if (m & 1)
 207                         static_cast<LineMarkers *>(perLineData[ldMarkers])->
 208                                 AddMark(line, i, LinesTotal());
 209         DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, 0, line);
 210         NotifyModified(mh);
 211 }
 212
 213 void Document::DeleteMark(int line, int markerNum) {
 214         static_cast<LineMarkers *>(perLineData[ldMarkers])->DeleteMark(line, markerNum, false);
 215         DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, 0, line);
 216         NotifyModified(mh);
 217 }
 218
 219 void Document::DeleteMarkFromHandle(int markerHandle) {
 220         static_cast<LineMarkers *>(perLineData[ldMarkers])->DeleteMarkFromHandle(markerHandle);
 221         DocModification mh(SC_MOD_CHANGEMARKER, 0, 0, 0, 0);
 222         mh.line = -1;
 223         NotifyModified(mh);
 224 }
 225
 226 void Document::DeleteAllMarks(int markerNum) {
 227         bool someChanges = false;
 228         for (int line = 0; line < LinesTotal(); line++) {
 229                 if (static_cast<LineMarkers *>(perLineData[ldMarkers])->DeleteMark(line, markerNum, true))
 230                         someChanges = true;
 231         }
 232         if (someChanges) {
 233                 DocModification mh(SC_MOD_CHANGEMARKER, 0, 0, 0, 0);
 234                 mh.line = -1;
 235                 NotifyModified(mh);
 236         }
 237 }
 238
 239 int Document::LineFromHandle(int markerHandle) {
 240         return static_cast<LineMarkers *>(perLineData[ldMarkers])->LineFromHandle(markerHandle);
 241 }
 242
 243 int SCI_METHOD Document::LineStart(int line) const {
 244         return cb.LineStart(line);
 245 }
 246
 247 int Document::LineEnd(int line) const {
 248         if (line == LinesTotal() - 1) {
 249                 return LineStart(line + 1);
 250         } else {
 251                 int position = LineStart(line + 1) - 1;
 252                 // When line terminator is CR+LF, may need to go back one more
 253                 if ((position > LineStart(line)) && (cb.CharAt(position - 1) == '\r')) {
 254                         position--;
 255                 }
 256                 return position;
 257         }
 258 }
 259
 260 void SCI_METHOD Document::SetErrorStatus(int status) {
 261         // Tell the watchers the lexer has changed.
 262         for (int i = 0; i < lenWatchers; i++) {
 263                 watchers[i].watcher->NotifyErrorOccurred(this, watchers[i].userData, status);
 264         }
 265 }
 266
 267 int SCI_METHOD Document::LineFromPosition(int pos) const {
 268         return cb.LineFromPosition(pos);
 269 }
 270
 271 int Document::LineEndPosition(int position) const {
 272         return LineEnd(LineFromPosition(position));
 273 }
 274
 275 bool Document::IsLineEndPosition(int position) const {
 276         return LineEnd(LineFromPosition(position)) == position;
 277 }
 278
 279 int Document::VCHomePosition(int position) const {
 280         int line = LineFromPosition(position);
 281         int startPosition = LineStart(line);
 282         int endLine = LineEnd(line);
 283         int startText = startPosition;
 284         while (startText < endLine && (cb.CharAt(startText) == ' ' || cb.CharAt(startText) == '\t'))
 285                 startText++;
 286         if (position == startText)
 287                 return startPosition;
 288         else
 289                 return startText;
 290 }
 291
 292 int SCI_METHOD Document::SetLevel(int line, int level) {
 293         int prev = static_cast<LineLevels *>(perLineData[ldLevels])->SetLevel(line, level, LinesTotal());
 294         if (prev != level) {
 295                 DocModification mh(SC_MOD_CHANGEFOLD | SC_MOD_CHANGEMARKER,
 296                                    LineStart(line), 0, 0, 0, line);
 297                 mh.foldLevelNow = level;
 298                 mh.foldLevelPrev = prev;
 299                 NotifyModified(mh);
 300         }
 301         return prev;
 302 }
 303
 304 int SCI_METHOD Document::GetLevel(int line) const {
 305         return static_cast<LineLevels *>(perLineData[ldLevels])->GetLevel(line);
 306 }
 307
 308 void Document::ClearLevels() {
 309         static_cast<LineLevels *>(perLineData[ldLevels])->ClearLevels();
 310 }
 311
 312 static bool IsSubordinate(int levelStart, int levelTry) {
 313         if (levelTry & SC_FOLDLEVELWHITEFLAG)
 314                 return true;
 315         else
 316                 return (levelStart & SC_FOLDLEVELNUMBERMASK) < (levelTry & SC_FOLDLEVELNUMBERMASK);
 317 }
 318
 319 int Document::GetLastChild(int lineParent, int level, int lastLine) {
 320         if (level == -1)
 321                 level = GetLevel(lineParent) & SC_FOLDLEVELNUMBERMASK;
 322         int maxLine = LinesTotal();
 323         int lookLastLine = (lastLine != -1) ? Platform::Minimum(LinesTotal() - 1, lastLine) : -1;
 324         int lineMaxSubord = lineParent;
 325         while (lineMaxSubord < maxLine - 1) {
 326                 EnsureStyledTo(LineStart(lineMaxSubord + 2));
 327                 if (!IsSubordinate(level, GetLevel(lineMaxSubord + 1)))
 328                         break;
 329                 if ((lookLastLine != -1) && (lineMaxSubord >= lookLastLine) && !(GetLevel(lineMaxSubord) & SC_FOLDLEVELWHITEFLAG))
 330                         break;
 331                 lineMaxSubord++;
 332         }
 333         if (lineMaxSubord > lineParent) {
 334                 if (level > (GetLevel(lineMaxSubord + 1) & SC_FOLDLEVELNUMBERMASK)) {
 335                         // Have chewed up some whitespace that belongs to a parent so seek back
 336                         if (GetLevel(lineMaxSubord) & SC_FOLDLEVELWHITEFLAG) {
 337                                 lineMaxSubord--;
 338                         }
 339                 }
 340         }
 341         return lineMaxSubord;
 342 }
 343
 344 int Document::GetFoldParent(int line) {
 345         int level = GetLevel(line) & SC_FOLDLEVELNUMBERMASK;
 346         int lineLook = line - 1;
 347         while ((lineLook > 0) && (
 348                     (!(GetLevel(lineLook) & SC_FOLDLEVELHEADERFLAG)) ||
 349                     ((GetLevel(lineLook) & SC_FOLDLEVELNUMBERMASK) >= level))
 350               ) {
 351                 lineLook--;
 352         }
 353         if ((GetLevel(lineLook) & SC_FOLDLEVELHEADERFLAG) &&
 354                 ((GetLevel(lineLook) & SC_FOLDLEVELNUMBERMASK) < level)) {
 355                 return lineLook;
 356         } else {
 357                 return -1;
 358         }
 359 }
 360
 361 void Document::GetHighlightDelimiters(HighlightDelimiter &highlightDelimiter, int line, int lastLine) {
 362         int level = GetLevel(line);
 363         int lookLastLine = Platform::Maximum(line, lastLine) + 1;
 364
 365         int lookLine = line;
 366         int lookLineLevel = level;
 367         int lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
 368         while ((lookLine > 0) && ((lookLineLevel & SC_FOLDLEVELWHITEFLAG) ||
 369                 ((lookLineLevel & SC_FOLDLEVELHEADERFLAG) && (lookLineLevelNum >= (GetLevel(lookLine + 1) & SC_FOLDLEVELNUMBERMASK))))) {
 370                 lookLineLevel = GetLevel(--lookLine);
 371                 lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
 372         }
 373
 374         int beginFoldBlock = (lookLineLevel & SC_FOLDLEVELHEADERFLAG) ? lookLine : GetFoldParent(lookLine);
 375         if (beginFoldBlock == -1) {
 376                 highlightDelimiter.Clear();
 377                 return;
 378         }
 379
 380         int endFoldBlock = GetLastChild(beginFoldBlock, -1, lookLastLine);
 381         int firstChangeableLineBefore = -1;
 382         if (endFoldBlock < line) {
 383                 lookLine = beginFoldBlock - 1;
 384                 lookLineLevel = GetLevel(lookLine);
 385                 lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
 386                 while ((lookLine >= 0) && (lookLineLevelNum >= SC_FOLDLEVELBASE)) {
 387                         if (lookLineLevel & SC_FOLDLEVELHEADERFLAG) {
 388                                 if (GetLastChild(lookLine, -1, lookLastLine) == line) {
 389                                         beginFoldBlock = lookLine;
 390                                         endFoldBlock = line;
 391                                         firstChangeableLineBefore = line - 1;
 392                                 }
 393                         }
 394                         if ((lookLine > 0) && (lookLineLevelNum == SC_FOLDLEVELBASE) && ((GetLevel(lookLine - 1) & SC_FOLDLEVELNUMBERMASK) > lookLineLevelNum))
 395                                 break;
 396                         lookLineLevel = GetLevel(--lookLine);
 397                         lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
 398                 }
 399         }
 400         if (firstChangeableLineBefore == -1) {
 401                 for (lookLine = line - 1, lookLineLevel = GetLevel(lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
 402                         lookLine >= beginFoldBlock;
 403                         lookLineLevel = GetLevel(--lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK) {
 404                         if ((lookLineLevel & SC_FOLDLEVELWHITEFLAG) || (lookLineLevelNum > (level & SC_FOLDLEVELNUMBERMASK))) {
 405                                 firstChangeableLineBefore = lookLine;
 406                                 break;
 407                         }
 408                 }
 409         }
 410         if (firstChangeableLineBefore == -1)
 411                 firstChangeableLineBefore = beginFoldBlock - 1;
 412
 413         int firstChangeableLineAfter = -1;
 414         for (lookLine = line + 1, lookLineLevel = GetLevel(lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK;
 415                 lookLine <= endFoldBlock;
 416                 lookLineLevel = GetLevel(++lookLine), lookLineLevelNum = lookLineLevel & SC_FOLDLEVELNUMBERMASK) {
 417                 if ((lookLineLevel & SC_FOLDLEVELHEADERFLAG) && (lookLineLevelNum < (GetLevel(lookLine + 1) & SC_FOLDLEVELNUMBERMASK))) {
 418                         firstChangeableLineAfter = lookLine;
 419                         break;
 420                 }
 421         }
 422         if (firstChangeableLineAfter == -1)
 423                 firstChangeableLineAfter = endFoldBlock + 1;
 424
 425         highlightDelimiter.beginFoldBlock = beginFoldBlock;
 426         highlightDelimiter.endFoldBlock = endFoldBlock;
 427         highlightDelimiter.firstChangeableLineBefore = firstChangeableLineBefore;
 428         highlightDelimiter.firstChangeableLineAfter = firstChangeableLineAfter;
 429 }
 430
 431 int Document::ClampPositionIntoDocument(int pos) {
 432         return Platform::Clamp(pos, 0, Length());
 433 }
 434
 435 bool Document::IsCrLf(int pos) {
 436         if (pos < 0)
 437                 return false;
 438         if (pos >= (Length() - 1))
 439                 return false;
 440         return (cb.CharAt(pos) == '\r') && (cb.CharAt(pos + 1) == '\n');
 441 }
 442
 443 int Document::LenChar(int pos) {
 444         if (pos < 0) {
 445                 return 1;
 446         } else if (IsCrLf(pos)) {
 447                 return 2;
 448         } else if (SC_CP_UTF8 == dbcsCodePage) {
 449                 unsigned char ch = static_cast<unsigned char>(cb.CharAt(pos));
 450                 if (ch < 0x80)
 451                         return 1;
 452                 int len = 2;
 453                 if (ch >= (0x80 + 0x40 + 0x20 + 0x10))
 454                         len = 4;
 455                 else if (ch >= (0x80 + 0x40 + 0x20))
 456                         len = 3;
 457                 int lengthDoc = Length();
 458                 if ((pos + len) > lengthDoc)
 459                         return lengthDoc -pos;
 460                 else
 461                         return len;
 462         } else if (dbcsCodePage) {
 463                 return IsDBCSLeadByte(cb.CharAt(pos)) ? 2 : 1;
 464         } else {
 465                 return 1;
 466         }
 467 }
 468
 469 static bool IsTrailByte(int ch) {
 470         return (ch >= 0x80) && (ch < (0x80 + 0x40));
 471 }
 472
 473 static int BytesFromLead(int leadByte) {
 474         if (leadByte > 0xF4) {
 475                 // Characters longer than 4 bytes not possible in current UTF-8
 476                 return 0;
 477         } else if (leadByte >= 0xF0) {
 478                 return 4;
 479         } else if (leadByte >= 0xE0) {
 480                 return 3;
 481         } else if (leadByte >= 0xC2) {
 482                 return 2;
 483         }
 484         return 0;
 485 }
 486
 487 bool Document::InGoodUTF8(int pos, int &start, int &end) const {
 488         int lead = pos;
 489         while ((lead>0) && (pos-lead < 4) && IsTrailByte(static_cast<unsigned char>(cb.CharAt(lead-1))))
 490                 lead--;
 491         start = 0;
 492         if (lead > 0) {
 493                 start = lead-1;
 494         }
 495         int leadByte = static_cast<unsigned char>(cb.CharAt(start));
 496         int bytes = BytesFromLead(leadByte);
 497         if (bytes == 0) {
 498                 return false;
 499         } else {
 500                 int trailBytes = bytes - 1;
 501                 int len = pos - lead + 1;
 502                 if (len > trailBytes)
 503                         // pos too far from lead
 504                         return false;
 505                 // Check that there are enough trails for this lead
 506                 int trail = pos + 1;
 507                 while ((trail-lead<trailBytes) && (trail < Length())) {
 508                         if (!IsTrailByte(static_cast<unsigned char>(cb.CharAt(trail)))) {
 509                                 return false;
 510                         }
 511                         trail++;
 512                 }
 513                 end = start + bytes;
 514                 return true;
 515         }
 516 }
 517
 518 // Normalise a position so that it is not halfway through a two byte character.
 519 // This can occur in two situations -
 520 // When lines are terminated with \r\n pairs which should be treated as one character.
 521 // When displaying DBCS text such as Japanese.
 522 // If moving, move the position in the indicated direction.
 523 int Document::MovePositionOutsideChar(int pos, int moveDir, bool checkLineEnd) {
 524         //Platform::DebugPrintf("NoCRLF %d %d\n", pos, moveDir);
 525         // If out of range, just return minimum/maximum value.
 526         if (pos <= 0)
 527                 return 0;
 528         if (pos >= Length())
 529                 return Length();
 530
 531         // PLATFORM_ASSERT(pos > 0 && pos < Length());
 532         if (checkLineEnd && IsCrLf(pos - 1)) {
 533                 if (moveDir > 0)
 534                         return pos + 1;
 535                 else
 536                         return pos - 1;
 537         }
 538
 539         if (dbcsCodePage) {
 540                 if (SC_CP_UTF8 == dbcsCodePage) {
 541                         unsigned char ch = static_cast<unsigned char>(cb.CharAt(pos));
 542                         int startUTF = pos;
 543                         int endUTF = pos;
 544                         if (IsTrailByte(ch) && InGoodUTF8(pos, startUTF, endUTF)) {
 545                                 // ch is a trail byte within a UTF-8 character
 546                                 if (moveDir > 0)
 547                                         pos = endUTF;
 548                                 else
 549                                         pos = startUTF;
 550                         }
 551                 } else {
 552                         // Anchor DBCS calculations at start of line because start of line can
 553                         // not be a DBCS trail byte.
 554                         int posStartLine = LineStart(LineFromPosition(pos));
 555                         if (pos == posStartLine)
 556                                 return pos;
 557
 558                         // Step back until a non-lead-byte is found.
 559                         int posCheck = pos;
 560                         while ((posCheck > posStartLine) && IsDBCSLeadByte(cb.CharAt(posCheck-1)))
 561                                 posCheck--;
 562
 563                         // Check from known start of character.
 564                         while (posCheck < pos) {
 565                                 int mbsize = IsDBCSLeadByte(cb.CharAt(posCheck)) ? 2 : 1;
 566                                 if (posCheck + mbsize == pos) {
 567                                         return pos;
 568                                 } else if (posCheck + mbsize > pos) {
 569                                         if (moveDir > 0) {
 570                                                 return posCheck + mbsize;
 571                                         } else {
 572                                                 return posCheck;
 573                                         }
 574                                 }
 575                                 posCheck += mbsize;
 576                         }
 577                 }
 578         }
 579
 580         return pos;
 581 }
 582
 583 // NextPosition moves between valid positions - it can not handle a position in the middle of a
 584 // multi-byte character. It is used to iterate through text more efficiently than MovePositionOutsideChar.
 585 // A \r\n pair is treated as two characters.
 586 int Document::NextPosition(int pos, int moveDir) const {
 587         // If out of range, just return minimum/maximum value.
 588         int increment = (moveDir > 0) ? 1 : -1;
 589         if (pos + increment <= 0)
 590                 return 0;
 591         if (pos + increment >= Length())
 592                 return Length();
 593
 594         if (dbcsCodePage) {
 595                 if (SC_CP_UTF8 == dbcsCodePage) {
 596                         pos += increment;
 597                         unsigned char ch = static_cast<unsigned char>(cb.CharAt(pos));
 598                         int startUTF = pos;
 599                         int endUTF = pos;
 600                         if (IsTrailByte(ch) && InGoodUTF8(pos, startUTF, endUTF)) {
 601                                 // ch is a trail byte within a UTF-8 character
 602                                 if (moveDir > 0)
 603                                         pos = endUTF;
 604                                 else
 605                                         pos = startUTF;
 606                         }
 607                 } else {
 608                         if (moveDir > 0) {
 609                                 int mbsize = IsDBCSLeadByte(cb.CharAt(pos)) ? 2 : 1;
 610                                 pos += mbsize;
 611                                 if (pos > Length())
 612                                         pos = Length();
 613                         } else {
 614                                 // Anchor DBCS calculations at start of line because start of line can
 615                                 // not be a DBCS trail byte.
 616                                 int posStartLine = LineStart(LineFromPosition(pos));
 617                                 // See http://msdn.microsoft.com/en-us/library/cc194792%28v=MSDN.10%29.aspx
 618                                 // http://msdn.microsoft.com/en-us/library/cc194790.aspx
 619                                 if ((pos - 1) <= posStartLine) {
 620                                         return pos - 1;
 621                                 } else if (IsDBCSLeadByte(cb.CharAt(pos - 1))) {
 622                                         // Must actually be trail byte
 623                                         return pos - 2;
 624                                 } else {
 625                                         // Otherwise, step back until a non-lead-byte is found.
 626                                         int posTemp = pos - 1;
 627                                         while (posStartLine <= --posTemp && IsDBCSLeadByte(cb.CharAt(posTemp)))
 628                                                 ;
 629                                         // Now posTemp+1 must point to the beginning of a character,
 630                                         // so figure out whether we went back an even or an odd
 631                                         // number of bytes and go back 1 or 2 bytes, respectively.
 632                                         return (pos - 1 - ((pos - posTemp) & 1));
 633                                 }
 634                         }
 635                 }
 636         } else {
 637                 pos += increment;
 638         }
 639
 640         return pos;
 641 }
 642
 643 bool Document::NextCharacter(int &pos, int moveDir) {
 644         // Returns true if pos changed
 645         int posNext = NextPosition(pos, moveDir);
 646         if (posNext == pos) {
 647                 return false;
 648         } else {
 649                 pos = posNext;
 650                 return true;
 651         }
 652 }
 653
 654 int SCI_METHOD Document::CodePage() const {
 655         return dbcsCodePage;
 656 }
 657
 658 bool SCI_METHOD Document::IsDBCSLeadByte(char ch) const {
 659         // Byte ranges found in Wikipedia articles with relevant search strings in each case
 660         unsigned char uch = static_cast<unsigned char>(ch);
 661         switch (dbcsCodePage) {
 662                 case 932:
 663                         // Shift_jis
 664                         return ((uch >= 0x81) && (uch <= 0x9F)) ||
 665                                 ((uch >= 0xE0) && (uch <= 0xFC));
 666                                 // Lead bytes F0 to FC may be a Microsoft addition.
 667                 case 936:
 668                         // GBK
 669                         return (uch >= 0x81) && (uch <= 0xFE);
 670                 case 949:
 671                         // Korean Wansung KS C-5601-1987
 672                         return (uch >= 0x81) && (uch <= 0xFE);
 673                 case 950:
 674                         // Big5
 675                         return (uch >= 0x81) && (uch <= 0xFE);
 676                 case 1361:
 677                         // Korean Johab KS C-5601-1992
 678                         return
 679                                 ((uch >= 0x84) && (uch <= 0xD3)) ||
 680                                 ((uch >= 0xD8) && (uch <= 0xDE)) ||
 681                                 ((uch >= 0xE0) && (uch <= 0xF9));
 682         }
 683         return false;
 684 }
 685
 686 inline bool IsSpaceOrTab(int ch) {
 687         return ch == ' ' || ch == '\t';
 688 }
 689
 690 // Need to break text into segments near lengthSegment but taking into
 691 // account the encoding to not break inside a UTF-8 or DBCS character
 692 // and also trying to avoid breaking inside a pair of combining characters.
 693 // The segment length must always be long enough (more than 4 bytes)
 694 // so that there will be at least one whole character to make a segment.
 695 // For UTF-8, text must consist only of valid whole characters.
 696 // In preference order from best to worst:
 697 //   1) Break after space
 698 //   2) Break before punctuation
 699 //   3) Break after whole character
 700
 701 int Document::SafeSegment(const char *text, int length, int lengthSegment) {
 702         if (length <= lengthSegment)
 703                 return length;
 704         int lastSpaceBreak = -1;
 705         int lastPunctuationBreak = -1;
 706         int lastEncodingAllowedBreak = -1;
 707         for (int j=0; j < lengthSegment;) {
 708                 unsigned char ch = static_cast<unsigned char>(text[j]);
 709                 if (j > 0) {
 710                         if (IsSpaceOrTab(text[j - 1]) && !IsSpaceOrTab(text[j])) {
 711                                 lastSpaceBreak = j;
 712                         }
 713                         if (ch < 'A') {
 714                                 lastPunctuationBreak = j;
 715                         }
 716                 }
 717                 lastEncodingAllowedBreak = j;
 718
 719                 if (dbcsCodePage == SC_CP_UTF8) {
 720                         j += (ch < 0x80) ? 1 : BytesFromLead(ch);
 721                 } else if (dbcsCodePage) {
 722                         j += IsDBCSLeadByte(ch) ? 2 : 1;
 723                 } else {
 724                         j++;
 725                 }
 726         }
 727         if (lastSpaceBreak >= 0) {
 728                 return lastSpaceBreak;
 729         } else if (lastPunctuationBreak >= 0) {
 730                 return lastPunctuationBreak;
 731         }
 732         return lastEncodingAllowedBreak;
 733 }
 734
 735 void Document::ModifiedAt(int pos) {
 736         if (endStyled > pos)
 737                 endStyled = pos;
 738 }
 739
 740 void Document::CheckReadOnly() {
 741         if (cb.IsReadOnly() && enteredReadOnlyCount == 0) {
 742                 enteredReadOnlyCount++;
 743                 NotifyModifyAttempt();
 744                 enteredReadOnlyCount--;
 745         }
 746 }
 747
 748 // Document only modified by gateways DeleteChars, InsertString, Undo, Redo, and SetStyleAt.
 749 // SetStyleAt does not change the persistent state of a document
 750
 751 bool Document::DeleteChars(int pos, int len) {
 752         if (len == 0)
 753                 return false;
 754         if ((pos + len) > Length())
 755                 return false;
 756         CheckReadOnly();
 757         if (enteredModification != 0) {
 758                 return false;
 759         } else {
 760                 enteredModification++;
 761                 if (!cb.IsReadOnly()) {
 762                         NotifyModified(
 763                             DocModification(
 764                                 SC_MOD_BEFOREDELETE | SC_PERFORMED_USER,
 765                                 pos, len,
 766                                 0, 0));
 767                         int prevLinesTotal = LinesTotal();
 768                         bool startSavePoint = cb.IsSavePoint();
 769                         bool startSequence = false;
 770                         const char *text = cb.DeleteChars(pos, len, startSequence);
 771                         if (startSavePoint && cb.IsCollectingUndo())
 772                                 NotifySavePoint(!startSavePoint);
 773                         if ((pos < Length()) || (pos == 0))
 774                                 ModifiedAt(pos);
 775                         else
 776                                 ModifiedAt(pos-1);
 777                         NotifyModified(
 778                             DocModification(
 779                                 SC_MOD_DELETETEXT | SC_PERFORMED_USER | (startSequence?SC_STARTACTION:0),
 780                                 pos, len,
 781                                 LinesTotal() - prevLinesTotal, text));
 782                 }
 783                 enteredModification--;
 784         }
 785         return !cb.IsReadOnly();
 786 }
 787
 788 /**
 789  * Insert a string with a length.
 790  */
 791 bool Document::InsertString(int position, const char *s, int insertLength) {
 792         if (insertLength <= 0) {
 793                 return false;
 794         }
 795         CheckReadOnly();
 796         if (enteredModification != 0) {
 797                 return false;
 798         } else {
 799                 enteredModification++;
 800                 if (!cb.IsReadOnly()) {
 801                         NotifyModified(
 802                             DocModification(
 803                                 SC_MOD_BEFOREINSERT | SC_PERFORMED_USER,
 804                                 position, insertLength,
 805                                 0, s));
 806                         int prevLinesTotal = LinesTotal();
 807                         bool startSavePoint = cb.IsSavePoint();
 808                         bool startSequence = false;
 809                         const char *text = cb.InsertString(position, s, insertLength, startSequence);
 810                         if (startSavePoint && cb.IsCollectingUndo())
 811                                 NotifySavePoint(!startSavePoint);
 812                         ModifiedAt(position);
 813                         NotifyModified(
 814                             DocModification(
 815                                 SC_MOD_INSERTTEXT | SC_PERFORMED_USER | (startSequence?SC_STARTACTION:0),
 816                                 position, insertLength,
 817                                 LinesTotal() - prevLinesTotal, text));
 818                 }
 819                 enteredModification--;
 820         }
 821         return !cb.IsReadOnly();
 822 }
 823
 824 int Document::Undo() {
 825         int newPos = -1;
 826         CheckReadOnly();
 827         if (enteredModification == 0) {
 828                 enteredModification++;
 829                 if (!cb.IsReadOnly()) {
 830                         bool startSavePoint = cb.IsSavePoint();
 831                         bool multiLine = false;
 832                         int steps = cb.StartUndo();
 833                         //Platform::DebugPrintf("Steps=%d\n", steps);
 834                         for (int step = 0; step < steps; step++) {
 835                                 const int prevLinesTotal = LinesTotal();
 836                                 const Action &action = cb.GetUndoStep();
 837                                 if (action.at == removeAction) {
 838                                         NotifyModified(DocModification(
 839                                                                         SC_MOD_BEFOREINSERT | SC_PERFORMED_UNDO, action));
 840                                 } else if (action.at == containerAction) {
 841                                         DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_UNDO);
 842                                         dm.token = action.position;
 843                                         NotifyModified(dm);
 844                                 } else {
 845                                         NotifyModified(DocModification(
 846                                                                         SC_MOD_BEFOREDELETE | SC_PERFORMED_UNDO, action));
 847                                 }
 848                                 cb.PerformUndoStep();
 849                                 int cellPosition = action.position;
 850                                 if (action.at != containerAction) {
 851                                         ModifiedAt(cellPosition);
 852                                         newPos = cellPosition;
 853                                 }
 854
 855                                 int modFlags = SC_PERFORMED_UNDO;
 856                                 // With undo, an insertion action becomes a deletion notification
 857                                 if (action.at == removeAction) {
 858                                         newPos += action.lenData;
 859                                         modFlags |= SC_MOD_INSERTTEXT;
 860                                 } else if (action.at == insertAction) {
 861                                         modFlags |= SC_MOD_DELETETEXT;
 862                                 }
 863                                 if (steps > 1)
 864                                         modFlags |= SC_MULTISTEPUNDOREDO;
 865                                 const int linesAdded = LinesTotal() - prevLinesTotal;
 866                                 if (linesAdded != 0)
 867                                         multiLine = true;
 868                                 if (step == steps - 1) {
 869                                         modFlags |= SC_LASTSTEPINUNDOREDO;
 870                                         if (multiLine)
 871                                                 modFlags |= SC_MULTILINEUNDOREDO;
 872                                 }
 873                                 NotifyModified(DocModification(modFlags, cellPosition, action.lenData,
 874                                                                                            linesAdded, action.data));
 875                         }
 876
 877                         bool endSavePoint = cb.IsSavePoint();
 878                         if (startSavePoint != endSavePoint)
 879                                 NotifySavePoint(endSavePoint);
 880                 }
 881                 enteredModification--;
 882         }
 883         return newPos;
 884 }
 885
 886 int Document::Redo() {
 887         int newPos = -1;
 888         CheckReadOnly();
 889         if (enteredModification == 0) {
 890                 enteredModification++;
 891                 if (!cb.IsReadOnly()) {
 892                         bool startSavePoint = cb.IsSavePoint();
 893                         bool multiLine = false;
 894                         int steps = cb.StartRedo();
 895                         for (int step = 0; step < steps; step++) {
 896                                 const int prevLinesTotal = LinesTotal();
 897                                 const Action &action = cb.GetRedoStep();
 898                                 if (action.at == insertAction) {
 899                                         NotifyModified(DocModification(
 900                                                                         SC_MOD_BEFOREINSERT | SC_PERFORMED_REDO, action));
 901                                 } else if (action.at == containerAction) {
 902                                         DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_REDO);
 903                                         dm.token = action.position;
 904                                         NotifyModified(dm);
 905                                 } else {
 906                                         NotifyModified(DocModification(
 907                                                                         SC_MOD_BEFOREDELETE | SC_PERFORMED_REDO, action));
 908                                 }
 909                                 cb.PerformRedoStep();
 910                                 if (action.at != containerAction) {
 911                                         ModifiedAt(action.position);
 912                                         newPos = action.position;
 913                                 }
 914
 915                                 int modFlags = SC_PERFORMED_REDO;
 916                                 if (action.at == insertAction) {
 917                                         newPos += action.lenData;
 918                                         modFlags |= SC_MOD_INSERTTEXT;
 919                                 } else if (action.at == removeAction) {
 920                                         modFlags |= SC_MOD_DELETETEXT;
 921                                 }
 922                                 if (steps > 1)
 923                                         modFlags |= SC_MULTISTEPUNDOREDO;
 924                                 const int linesAdded = LinesTotal() - prevLinesTotal;
 925                                 if (linesAdded != 0)
 926                                         multiLine = true;
 927                                 if (step == steps - 1) {
 928                                         modFlags |= SC_LASTSTEPINUNDOREDO;
 929                                         if (multiLine)
 930                                                 modFlags |= SC_MULTILINEUNDOREDO;
 931                                 }
 932                                 NotifyModified(
 933                                         DocModification(modFlags, action.position, action.lenData,
 934                                                                         linesAdded, action.data));
 935                         }
 936
 937                         bool endSavePoint = cb.IsSavePoint();
 938                         if (startSavePoint != endSavePoint)
 939                                 NotifySavePoint(endSavePoint);
 940                 }
 941                 enteredModification--;
 942         }
 943         return newPos;
 944 }
 945
 946 /**
 947  * Insert a single character.
 948  */
 949 bool Document::InsertChar(int pos, char ch) {
 950         char chs[1];
 951         chs[0] = ch;
 952         return InsertString(pos, chs, 1);
 953 }
 954
 955 /**
 956  * Insert a null terminated string.
 957  */
 958 bool Document::InsertCString(int position, const char *s) {
 959         return InsertString(position, s, static_cast<int>(strlen(s)));
 960 }
 961
 962 void Document::ChangeChar(int pos, char ch) {
 963         DeleteChars(pos, 1);
 964         InsertChar(pos, ch);
 965 }
 966
 967 void Document::DelChar(int pos) {
 968         DeleteChars(pos, LenChar(pos));
 969 }
 970
 971 void Document::DelCharBack(int pos) {
 972         if (pos <= 0) {
 973                 return;
 974         } else if (IsCrLf(pos - 2)) {
 975                 DeleteChars(pos - 2, 2);
 976         } else if (dbcsCodePage) {
 977                 int startChar = NextPosition(pos, -1);
 978                 DeleteChars(startChar, pos - startChar);
 979         } else {
 980                 DeleteChars(pos - 1, 1);
 981         }
 982 }
 983
 984 static bool isindentchar(char ch) {
 985         return (ch == ' ') || (ch == '\t');
 986 }
 987
 988 static int NextTab(int pos, int tabSize) {
 989         return ((pos / tabSize) + 1) * tabSize;
 990 }
 991
 992 static void CreateIndentation(char *linebuf, int length, int indent, int tabSize, bool insertSpaces) {
 993         length--;       // ensure space for \0
 994         if (!insertSpaces) {
 995                 while ((indent >= tabSize) && (length > 0)) {
 996                         *linebuf++ = '\t';
 997                         indent -= tabSize;
 998                         length--;
 999                 }
1000         }
1001         while ((indent > 0) && (length > 0)) {
1002                 *linebuf++ = ' ';
1003                 indent--;
1004                 length--;
1005         }
1006         *linebuf = '\0';
1007 }
1008
1009 int SCI_METHOD Document::GetLineIndentation(int line) {
1010         int indent = 0;
1011         if ((line >= 0) && (line < LinesTotal())) {
1012                 int lineStart = LineStart(line);
1013                 int length = Length();
1014                 for (int i = lineStart; i < length; i++) {
1015                         char ch = cb.CharAt(i);
1016                         if (ch == ' ')
1017                                 indent++;
1018                         else if (ch == '\t')
1019                                 indent = NextTab(indent, tabInChars);
1020                         else
1021                                 return indent;
1022                 }
1023         }
1024         return indent;
1025 }
1026
1027 void Document::SetLineIndentation(int line, int indent) {
1028         int indentOfLine = GetLineIndentation(line);
1029         if (indent < 0)
1030                 indent = 0;
1031         if (indent != indentOfLine) {
1032                 char linebuf[1000];
1033                 CreateIndentation(linebuf, sizeof(linebuf), indent, tabInChars, !useTabs);
1034                 int thisLineStart = LineStart(line);
1035                 int indentPos = GetLineIndentPosition(line);
1036                 UndoGroup ug(this);
1037                 DeleteChars(thisLineStart, indentPos - thisLineStart);
1038                 InsertCString(thisLineStart, linebuf);
1039         }
1040 }
1041
1042 int Document::GetLineIndentPosition(int line) const {
1043         if (line < 0)
1044                 return 0;
1045         int pos = LineStart(line);
1046         int length = Length();
1047         while ((pos < length) && isindentchar(cb.CharAt(pos))) {
1048                 pos++;
1049         }
1050         return pos;
1051 }
1052
1053 int Document::GetColumn(int pos) {
1054         int column = 0;
1055         int line = LineFromPosition(pos);
1056         if ((line >= 0) && (line < LinesTotal())) {
1057                 for (int i = LineStart(line); i < pos;) {
1058                         char ch = cb.CharAt(i);
1059                         if (ch == '\t') {
1060                                 column = NextTab(column, tabInChars);
1061                                 i++;
1062                         } else if (ch == '\r') {
1063                                 return column;
1064                         } else if (ch == '\n') {
1065                                 return column;
1066                         } else if (i >= Length()) {
1067                                 return column;
1068                         } else {
1069                                 column++;
1070                                 i = NextPosition(i, 1);
1071                         }
1072                 }
1073         }
1074         return column;
1075 }
1076
1077 int Document::FindColumn(int line, int column) {
1078         int position = LineStart(line);
1079         if ((line >= 0) && (line < LinesTotal())) {
1080                 int columnCurrent = 0;
1081                 while ((columnCurrent < column) && (position < Length())) {
1082                         char ch = cb.CharAt(position);
1083                         if (ch == '\t') {
1084                                 columnCurrent = NextTab(columnCurrent, tabInChars);
1085                                 position++;
1086                         } else if (ch == '\r') {
1087                                 return position;
1088                         } else if (ch == '\n') {
1089                                 return position;
1090                         } else {
1091                                 columnCurrent++;
1092                                 position = NextPosition(position, 1);
1093                         }
1094                 }
1095         }
1096         return position;
1097 }
1098
1099 void Document::Indent(bool forwards, int lineBottom, int lineTop) {
1100         // Dedent - suck white space off the front of the line to dedent by equivalent of a tab
1101         for (int line = lineBottom; line >= lineTop; line--) {
1102                 int indentOfLine = GetLineIndentation(line);
1103                 if (forwards) {
1104                         if (LineStart(line) < LineEnd(line)) {
1105                                 SetLineIndentation(line, indentOfLine + IndentSize());
1106                         }
1107                 } else {
1108                         SetLineIndentation(line, indentOfLine - IndentSize());
1109                 }
1110         }
1111 }
1112
1113 // Convert line endings for a piece of text to a particular mode.
1114 // Stop at len or when a NUL is found.
1115 // Caller must delete the returned pointer.
1116 char *Document::TransformLineEnds(int *pLenOut, const char *s, size_t len, int eolModeWanted) {
1117         char *dest = new char[2 * len + 1];
1118         const char *sptr = s;
1119         char *dptr = dest;
1120         for (size_t i = 0; (i < len) && (*sptr != '\0'); i++) {
1121                 if (*sptr == '\n' || *sptr == '\r') {
1122                         if (eolModeWanted == SC_EOL_CR) {
1123                                 *dptr++ = '\r';
1124                         } else if (eolModeWanted == SC_EOL_LF) {
1125                                 *dptr++ = '\n';
1126                         } else { // eolModeWanted == SC_EOL_CRLF
1127                                 *dptr++ = '\r';
1128                                 *dptr++ = '\n';
1129                         }
1130                         if ((*sptr == '\r') && (i+1 < len) && (*(sptr+1) == '\n')) {
1131                                 i++;
1132                                 sptr++;
1133                         }
1134                         sptr++;
1135                 } else {
1136                         *dptr++ = *sptr++;
1137                 }
1138         }
1139         *dptr++ = '\0';
1140         *pLenOut = (dptr - dest) - 1;
1141         return dest;
1142 }
1143
1144 void Document::ConvertLineEnds(int eolModeSet) {
1145         UndoGroup ug(this);
1146
1147         for (int pos = 0; pos < Length(); pos++) {
1148                 if (cb.CharAt(pos) == '\r') {
1149                         if (cb.CharAt(pos + 1) == '\n') {
1150                                 // CRLF
1151                                 if (eolModeSet == SC_EOL_CR) {
1152                                         DeleteChars(pos + 1, 1); // Delete the LF
1153                                 } else if (eolModeSet == SC_EOL_LF) {
1154                                         DeleteChars(pos, 1); // Delete the CR
1155                                 } else {
1156                                         pos++;
1157                                 }
1158                         } else {
1159                                 // CR
1160                                 if (eolModeSet == SC_EOL_CRLF) {
1161                                         InsertString(pos + 1, "\n", 1); // Insert LF
1162                                         pos++;
1163                                 } else if (eolModeSet == SC_EOL_LF) {
1164                                         InsertString(pos, "\n", 1); // Insert LF
1165                                         DeleteChars(pos + 1, 1); // Delete CR
1166                                 }
1167                         }
1168                 } else if (cb.CharAt(pos) == '\n') {
1169                         // LF
1170                         if (eolModeSet == SC_EOL_CRLF) {
1171                                 InsertString(pos, "\r", 1); // Insert CR
1172                                 pos++;
1173                         } else if (eolModeSet == SC_EOL_CR) {
1174                                 InsertString(pos, "\r", 1); // Insert CR
1175                                 DeleteChars(pos + 1, 1); // Delete LF
1176                         }
1177                 }
1178         }
1179
1180 }
1181
1182 bool Document::IsWhiteLine(int line) const {
1183         int currentChar = LineStart(line);
1184         int endLine = LineEnd(line);
1185         while (currentChar < endLine) {
1186                 if (cb.CharAt(currentChar) != ' ' && cb.CharAt(currentChar) != '\t') {
1187                         return false;
1188                 }
1189                 ++currentChar;
1190         }
1191         return true;
1192 }
1193
1194 int Document::ParaUp(int pos) {
1195         int line = LineFromPosition(pos);
1196         line--;
1197         while (line >= 0 && IsWhiteLine(line)) { // skip empty lines
1198                 line--;
1199         }
1200         while (line >= 0 && !IsWhiteLine(line)) { // skip non-empty lines
1201                 line--;
1202         }
1203         line++;
1204         return LineStart(line);
1205 }
1206
1207 int Document::ParaDown(int pos) {
1208         int line = LineFromPosition(pos);
1209         while (line < LinesTotal() && !IsWhiteLine(line)) { // skip non-empty lines
1210                 line++;
1211         }
1212         while (line < LinesTotal() && IsWhiteLine(line)) { // skip empty lines
1213                 line++;
1214         }
1215         if (line < LinesTotal())
1216                 return LineStart(line);
1217         else // end of a document
1218                 return LineEnd(line-1);
1219 }
1220
1221 CharClassify::cc Document::WordCharClass(unsigned char ch) {
1222         if ((SC_CP_UTF8 == dbcsCodePage) && (ch >= 0x80))
1223                 return CharClassify::ccWord;
1224         return charClass.GetClass(ch);
1225 }
1226
1227 /**
1228  * Used by commmands that want to select whole words.
1229  * Finds the start of word at pos when delta < 0 or the end of the word when delta >= 0.
1230  */
1231 int Document::ExtendWordSelect(int pos, int delta, bool onlyWordCharacters) {
1232         CharClassify::cc ccStart = CharClassify::ccWord;
1233         if (delta < 0) {
1234                 if (!onlyWordCharacters)
1235                         ccStart = WordCharClass(cb.CharAt(pos-1));
1236                 while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == ccStart))
1237                         pos--;
1238         } else {
1239                 if (!onlyWordCharacters && pos < Length())
1240                         ccStart = WordCharClass(cb.CharAt(pos));
1241                 while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == ccStart))
1242                         pos++;
1243         }
1244         return MovePositionOutsideChar(pos, delta, true);
1245 }
1246
1247 /**
1248  * Find the start of the next word in either a forward (delta >= 0) or backwards direction
1249  * (delta < 0).
1250  * This is looking for a transition between character classes although there is also some
1251  * additional movement to transit white space.
1252  * Used by cursor movement by word commands.
1253  */
1254 int Document::NextWordStart(int pos, int delta) {
1255         if (delta < 0) {
1256                 while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == CharClassify::ccSpace))
1257                         pos--;
1258                 if (pos > 0) {
1259                         CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos-1));
1260                         while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == ccStart)) {
1261                                 pos--;
1262                         }
1263                 }
1264         } else {
1265                 CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos));
1266                 while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == ccStart))
1267                         pos++;
1268                 while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == CharClassify::ccSpace))
1269                         pos++;
1270         }
1271         return pos;
1272 }
1273
1274 /**
1275  * Find the end of the next word in either a forward (delta >= 0) or backwards direction
1276  * (delta < 0).
1277  * This is looking for a transition between character classes although there is also some
1278  * additional movement to transit white space.
1279  * Used by cursor movement by word commands.
1280  */
1281 int Document::NextWordEnd(int pos, int delta) {
1282         if (delta < 0) {
1283                 if (pos > 0) {
1284                         CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos-1));
1285                         if (ccStart != CharClassify::ccSpace) {
1286                                 while (pos > 0 && WordCharClass(cb.CharAt(pos - 1)) == ccStart) {
1287                                         pos--;
1288                                 }
1289                         }
1290                         while (pos > 0 && WordCharClass(cb.CharAt(pos - 1)) == CharClassify::ccSpace) {
1291                                 pos--;
1292                         }
1293                 }
1294         } else {
1295                 while (pos < Length() && WordCharClass(cb.CharAt(pos)) == CharClassify::ccSpace) {
1296                         pos++;
1297                 }
1298                 if (pos < Length()) {
1299                         CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos));
1300                         while (pos < Length() && WordCharClass(cb.CharAt(pos)) == ccStart) {
1301                                 pos++;
1302                         }
1303                 }
1304         }
1305         return pos;
1306 }
1307
1308 /**
1309  * Check that the character at the given position is a word or punctuation character and that
1310  * the previous character is of a different character class.
1311  */
1312 bool Document::IsWordStartAt(int pos) {
1313         if (pos > 0) {
1314                 CharClassify::cc ccPos = WordCharClass(CharAt(pos));
1315                 return (ccPos == CharClassify::ccWord || ccPos == CharClassify::ccPunctuation) &&
1316                         (ccPos != WordCharClass(CharAt(pos - 1)));
1317         }
1318         return true;
1319 }
1320
1321 /**
1322  * Check that the character at the given position is a word or punctuation character and that
1323  * the next character is of a different character class.
1324  */
1325 bool Document::IsWordEndAt(int pos) {
1326         if (pos < Length()) {
1327                 CharClassify::cc ccPrev = WordCharClass(CharAt(pos-1));
1328                 return (ccPrev == CharClassify::ccWord || ccPrev == CharClassify::ccPunctuation) &&
1329                         (ccPrev != WordCharClass(CharAt(pos)));
1330         }
1331         return true;
1332 }
1333
1334 /**
1335  * Check that the given range is has transitions between character classes at both
1336  * ends and where the characters on the inside are word or punctuation characters.
1337  */
1338 bool Document::IsWordAt(int start, int end) {
1339         return IsWordStartAt(start) && IsWordEndAt(end);
1340 }
1341
1342 static inline char MakeLowerCase(char ch) {
1343         if (ch < 'A' || ch > 'Z')
1344                 return ch;
1345         else
1346                 return static_cast<char>(ch - 'A' + 'a');
1347 }
1348
1349 static bool GoodTrailByte(int v) {
1350         return (v >= 0x80) && (v < 0xc0);
1351 }
1352
1353 size_t Document::ExtractChar(int pos, char *bytes) {
1354         unsigned char ch = static_cast<unsigned char>(cb.CharAt(pos));
1355         size_t widthChar = UTF8CharLength(ch);
1356         bytes[0] = ch;
1357         for (size_t i=1; i<widthChar; i++) {
1358                 bytes[i] = cb.CharAt(static_cast<int>(pos+i));
1359                 if (!GoodTrailByte(static_cast<unsigned char>(bytes[i]))) { // Bad byte
1360                         widthChar = 1;
1361                 }
1362         }
1363         return widthChar;
1364 }
1365
1366 CaseFolderTable::CaseFolderTable() {
1367         for (size_t iChar=0; iChar<sizeof(mapping); iChar++) {
1368                 mapping[iChar] = static_cast<char>(iChar);
1369         }
1370 }
1371
1372 CaseFolderTable::~CaseFolderTable() {
1373 }
1374
1375 size_t CaseFolderTable::Fold(char *folded, size_t sizeFolded, const char *mixed, size_t lenMixed) {
1376         if (lenMixed > sizeFolded) {
1377                 return 0;
1378         } else {
1379                 for (size_t i=0; i<lenMixed; i++) {
1380                         folded[i] = mapping[static_cast<unsigned char>(mixed[i])];
1381                 }
1382                 return lenMixed;
1383         }
1384 }
1385
1386 void CaseFolderTable::SetTranslation(char ch, char chTranslation) {
1387         mapping[static_cast<unsigned char>(ch)] = chTranslation;
1388 }
1389
1390 void CaseFolderTable::StandardASCII() {
1391         for (size_t iChar=0; iChar<sizeof(mapping); iChar++) {
1392                 if (iChar >= 'A' && iChar <= 'Z') {
1393                         mapping[iChar] = static_cast<char>(iChar - 'A' + 'a');
1394                 } else {
1395                         mapping[iChar] = static_cast<char>(iChar);
1396                 }
1397         }
1398 }
1399
1400 bool Document::MatchesWordOptions(bool word, bool wordStart, int pos, int length) {
1401         return (!word && !wordStart) ||
1402                         (word && IsWordAt(pos, pos + length)) ||
1403                         (wordStart && IsWordStartAt(pos));
1404 }
1405
1406 /**
1407  * Find text in document, supporting both forward and backward
1408  * searches (just pass minPos > maxPos to do a backward search)
1409  * Has not been tested with backwards DBCS searches yet.
1410  */
1411 long Document::FindText(int minPos, int maxPos, const char *search,
1412                         bool caseSensitive, bool word, bool wordStart, bool regExp, int flags,
1413                         int *length, CaseFolder *pcf) {
1414         if (*length <= 0)
1415                 return minPos;
1416         if (regExp) {
1417                 if (!regex)
1418                         regex = CreateRegexSearch(&charClass);
1419                 return regex->FindText(this, minPos, maxPos, search, caseSensitive, word, wordStart, flags, length);
1420         } else {
1421
1422                 const bool forward = minPos <= maxPos;
1423                 const int increment = forward ? 1 : -1;
1424
1425                 // Range endpoints should not be inside DBCS characters, but just in case, move them.
1426                 const int startPos = MovePositionOutsideChar(minPos, increment, false);
1427                 const int endPos = MovePositionOutsideChar(maxPos, increment, false);
1428
1429                 // Compute actual search ranges needed
1430                 const int lengthFind = (*length == -1) ? static_cast<int>(strlen(search)) : *length;
1431
1432                 //Platform::DebugPrintf("Find %d %d %s %d\n", startPos, endPos, ft->lpstrText, lengthFind);
1433                 const int limitPos = Platform::Maximum(startPos, endPos);
1434                 int pos = startPos;
1435                 if (!forward) {
1436                         // Back all of a character
1437                         pos = NextPosition(pos, increment);
1438                 }
1439                 if (caseSensitive) {
1440                         const int endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos;
1441                         while (forward ? (pos < endSearch) : (pos >= endSearch)) {
1442                                 bool found = (pos + lengthFind) <= limitPos;
1443                                 for (int indexSearch = 0; (indexSearch < lengthFind) && found; indexSearch++) {
1444                                         found = CharAt(pos + indexSearch) == search[indexSearch];
1445                                 }
1446                                 if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
1447                                         return pos;
1448                                 }
1449                                 if (!NextCharacter(pos, increment))
1450                                         break;
1451                         }
1452                 } else if (SC_CP_UTF8 == dbcsCodePage) {
1453                         const size_t maxBytesCharacter = 4;
1454                         const size_t maxFoldingExpansion = 4;
1455                         std::vector<char> searchThing(lengthFind * maxBytesCharacter * maxFoldingExpansion + 1);
1456                         const int lenSearch = static_cast<int>(
1457                                 pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind));
1458                         while (forward ? (pos < endPos) : (pos >= endPos)) {
1459                                 int widthFirstCharacter = 0;
1460                                 int indexDocument = 0;
1461                                 int indexSearch = 0;
1462                                 bool characterMatches = true;
1463                                 while (characterMatches &&
1464                                         ((pos + indexDocument) < limitPos) &&
1465                                         (indexSearch < lenSearch)) {
1466                                         char bytes[maxBytesCharacter + 1];
1467                                         bytes[maxBytesCharacter] = 0;
1468                                         const int widthChar = static_cast<int>(ExtractChar(pos + indexDocument, bytes));
1469                                         if (!widthFirstCharacter)
1470                                                 widthFirstCharacter = widthChar;
1471                                         if ((pos + indexDocument + widthChar) > limitPos)
1472                                                 break;
1473                                         char folded[maxBytesCharacter * maxFoldingExpansion + 1];
1474                                         const int lenFlat = static_cast<int>(pcf->Fold(folded, sizeof(folded), bytes, widthChar));
1475                                         folded[lenFlat] = 0;
1476                                         // Does folded match the buffer
1477                                         characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat);
1478                                         indexDocument += widthChar;
1479                                         indexSearch += lenFlat;
1480                                 }
1481                                 if (characterMatches && (indexSearch == static_cast<int>(lenSearch))) {
1482                                         if (MatchesWordOptions(word, wordStart, pos, indexDocument)) {
1483                                                 *length = indexDocument;
1484                                                 return pos;
1485                                         }
1486                                 }
1487                                 if (forward) {
1488                                         pos += widthFirstCharacter;
1489                                 } else {
1490                                         if (!NextCharacter(pos, increment))
1491                                                 break;
1492                                 }
1493                         }
1494                 } else if (dbcsCodePage) {
1495                         const size_t maxBytesCharacter = 2;
1496                         const size_t maxFoldingExpansion = 4;
1497                         std::vector<char> searchThing(lengthFind * maxBytesCharacter * maxFoldingExpansion + 1);
1498                         const int lenSearch = static_cast<int>(
1499                                 pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind));
1500                         while (forward ? (pos < endPos) : (pos >= endPos)) {
1501                                 int indexDocument = 0;
1502                                 int indexSearch = 0;
1503                                 bool characterMatches = true;
1504                                 while (characterMatches &&
1505                                         ((pos + indexDocument) < limitPos) &&
1506                                         (indexSearch < lenSearch)) {
1507                                         char bytes[maxBytesCharacter + 1];
1508                                         bytes[0] = cb.CharAt(pos + indexDocument);
1509                                         const int widthChar = IsDBCSLeadByte(bytes[0]) ? 2 : 1;
1510                                         if (widthChar == 2)
1511                                                 bytes[1] = cb.CharAt(pos + indexDocument + 1);
1512                                         if ((pos + indexDocument + widthChar) > limitPos)
1513                                                 break;
1514                                         char folded[maxBytesCharacter * maxFoldingExpansion + 1];
1515                                         const int lenFlat = static_cast<int>(pcf->Fold(folded, sizeof(folded), bytes, widthChar));
1516                                         folded[lenFlat] = 0;
1517                                         // Does folded match the buffer
1518                                         characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat);
1519                                         indexDocument += widthChar;
1520                                         indexSearch += lenFlat;
1521                                 }
1522                                 if (characterMatches && (indexSearch == static_cast<int>(lenSearch))) {
1523                                         if (MatchesWordOptions(word, wordStart, pos, indexDocument)) {
1524                                                 *length = indexDocument;
1525                                                 return pos;
1526                                         }
1527                                 }
1528                                 if (!NextCharacter(pos, increment))
1529                                         break;
1530                         }
1531                 } else {
1532                         const int endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos;
1533                         std::vector<char> searchThing(lengthFind + 1);
1534                         pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind);
1535                         while (forward ? (pos < endSearch) : (pos >= endSearch)) {
1536                                 bool found = (pos + lengthFind) <= limitPos;
1537                                 for (int indexSearch = 0; (indexSearch < lengthFind) && found; indexSearch++) {
1538                                         char ch = CharAt(pos + indexSearch);
1539                                         char folded[2];
1540                                         pcf->Fold(folded, sizeof(folded), &ch, 1);
1541                                         found = folded[0] == searchThing[indexSearch];
1542                                 }
1543                                 if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
1544                                         return pos;
1545                                 }
1546                                 if (!NextCharacter(pos, increment))
1547                                         break;
1548                         }
1549                 }
1550         }
1551         //Platform::DebugPrintf("Not found\n");
1552         return -1;
1553 }
1554
1555 const char *Document::SubstituteByPosition(const char *text, int *length) {
1556         if (regex)
1557                 return regex->SubstituteByPosition(this, text, length);
1558         else
1559                 return 0;
1560 }
1561
1562 int Document::LinesTotal() const {
1563         return cb.Lines();
1564 }
1565
1566 void Document::ChangeCase(Range r, bool makeUpperCase) {
1567         for (int pos = r.start; pos < r.end;) {
1568                 int len = LenChar(pos);
1569                 if (len == 1) {
1570                         char ch = CharAt(pos);
1571                         if (makeUpperCase) {
1572                                 if (IsLowerCase(ch)) {
1573                                         ChangeChar(pos, static_cast<char>(MakeUpperCase(ch)));
1574                                 }
1575                         } else {
1576                                 if (IsUpperCase(ch)) {
1577                                         ChangeChar(pos, static_cast<char>(MakeLowerCase(ch)));
1578                                 }
1579                         }
1580                 }
1581                 pos += len;
1582         }
1583 }
1584
1585 void Document::SetDefaultCharClasses(bool includeWordClass) {
1586     charClass.SetDefaultCharClasses(includeWordClass);
1587 }
1588
1589 void Document::SetCharClasses(const unsigned char *chars, CharClassify::cc newCharClass) {
1590     charClass.SetCharClasses(chars, newCharClass);
1591 }
1592
1593 void Document::SetStylingBits(int bits) {
1594         stylingBits = bits;
1595         stylingBitsMask = (1 << stylingBits) - 1;
1596 }
1597
1598 void SCI_METHOD Document::StartStyling(int position, char mask) {
1599         stylingMask = mask;
1600         endStyled = position;
1601 }
1602
1603 bool SCI_METHOD Document::SetStyleFor(int length, char style) {
1604         if (enteredStyling != 0) {
1605                 return false;
1606         } else {
1607                 enteredStyling++;
1608                 style &= stylingMask;
1609                 int prevEndStyled = endStyled;
1610                 if (cb.SetStyleFor(endStyled, length, style, stylingMask)) {
1611                         DocModification mh(SC_MOD_CHANGESTYLE | SC_PERFORMED_USER,
1612                                            prevEndStyled, length);
1613                         NotifyModified(mh);
1614                 }
1615                 endStyled += length;
1616                 enteredStyling--;
1617                 return true;
1618         }
1619 }
1620
1621 bool SCI_METHOD Document::SetStyles(int length, const char *styles) {
1622         if (enteredStyling != 0) {
1623                 return false;
1624         } else {
1625                 enteredStyling++;
1626                 bool didChange = false;
1627                 int startMod = 0;
1628                 int endMod = 0;
1629                 for (int iPos = 0; iPos < length; iPos++, endStyled++) {
1630                         PLATFORM_ASSERT(endStyled < Length());
1631                         if (cb.SetStyleAt(endStyled, styles[iPos], stylingMask)) {
1632                                 if (!didChange) {
1633                                         startMod = endStyled;
1634                                 }
1635                                 didChange = true;
1636                                 endMod = endStyled;
1637                         }
1638                 }
1639                 if (didChange) {
1640                         DocModification mh(SC_MOD_CHANGESTYLE | SC_PERFORMED_USER,
1641                                            startMod, endMod - startMod + 1);
1642                         NotifyModified(mh);
1643                 }
1644                 enteredStyling--;
1645                 return true;
1646         }
1647 }
1648
1649 void Document::EnsureStyledTo(int pos) {
1650         if ((enteredStyling == 0) && (pos > GetEndStyled())) {
1651                 IncrementStyleClock();
1652                 if (pli && !pli->UseContainerLexing()) {
1653                         int lineEndStyled = LineFromPosition(GetEndStyled());
1654                         int endStyledTo = LineStart(lineEndStyled);
1655                         pli->Colourise(endStyledTo, pos);
1656                 } else {
1657                         // Ask the watchers to style, and stop as soon as one responds.
1658                         for (int i = 0; pos > GetEndStyled() && i < lenWatchers; i++) {
1659                                 watchers[i].watcher->NotifyStyleNeeded(this, watchers[i].userData, pos);
1660                         }
1661                 }
1662         }
1663 }
1664
1665 void Document::LexerChanged() {
1666         // Tell the watchers the lexer has changed.
1667         for (int i = 0; i < lenWatchers; i++) {
1668                 watchers[i].watcher->NotifyLexerChanged(this, watchers[i].userData);
1669         }
1670 }
1671
1672 int SCI_METHOD Document::SetLineState(int line, int state) {
1673         int statePrevious = static_cast<LineState *>(perLineData[ldState])->SetLineState(line, state);
1674         if (state != statePrevious) {
1675                 DocModification mh(SC_MOD_CHANGELINESTATE, LineStart(line), 0, 0, 0, line);
1676                 NotifyModified(mh);
1677         }
1678         return statePrevious;
1679 }
1680
1681 int SCI_METHOD Document::GetLineState(int line) const {
1682         return static_cast<LineState *>(perLineData[ldState])->GetLineState(line);
1683 }
1684
1685 int Document::GetMaxLineState() {
1686         return static_cast<LineState *>(perLineData[ldState])->GetMaxLineState();
1687 }
1688
1689 void SCI_METHOD Document::ChangeLexerState(int start, int end) {
1690         DocModification mh(SC_MOD_LEXERSTATE, start, end-start, 0, 0, 0);
1691         NotifyModified(mh);
1692 }
1693
1694 StyledText Document::MarginStyledText(int line) {
1695         LineAnnotation *pla = static_cast<LineAnnotation *>(perLineData[ldMargin]);
1696         return StyledText(pla->Length(line), pla->Text(line),
1697                 pla->MultipleStyles(line), pla->Style(line), pla->Styles(line));
1698 }
1699
1700 void Document::MarginSetText(int line, const char *text) {
1701         static_cast<LineAnnotation *>(perLineData[ldMargin])->SetText(line, text);
1702         DocModification mh(SC_MOD_CHANGEMARGIN, LineStart(line), 0, 0, 0, line);
1703         NotifyModified(mh);
1704 }
1705
1706 void Document::MarginSetStyle(int line, int style) {
1707         static_cast<LineAnnotation *>(perLineData[ldMargin])->SetStyle(line, style);
1708 }
1709
1710 void Document::MarginSetStyles(int line, const unsigned char *styles) {
1711         static_cast<LineAnnotation *>(perLineData[ldMargin])->SetStyles(line, styles);
1712 }
1713
1714 int Document::MarginLength(int line) const {
1715         return static_cast<LineAnnotation *>(perLineData[ldMargin])->Length(line);
1716 }
1717
1718 void Document::MarginClearAll() {
1719         int maxEditorLine = LinesTotal();
1720         for (int l=0; l<maxEditorLine; l++)
1721                 MarginSetText(l, 0);
1722         // Free remaining data
1723         static_cast<LineAnnotation *>(perLineData[ldMargin])->ClearAll();
1724 }
1725
1726 bool Document::AnnotationAny() const {
1727         return static_cast<LineAnnotation *>(perLineData[ldAnnotation])->AnySet();
1728 }
1729
1730 StyledText Document::AnnotationStyledText(int line) {
1731         LineAnnotation *pla = static_cast<LineAnnotation *>(perLineData[ldAnnotation]);
1732         return StyledText(pla->Length(line), pla->Text(line),
1733                 pla->MultipleStyles(line), pla->Style(line), pla->Styles(line));
1734 }
1735
1736 void Document::AnnotationSetText(int line, const char *text) {
1737         const int linesBefore = AnnotationLines(line);
1738         static_cast<LineAnnotation *>(perLineData[ldAnnotation])->SetText(line, text);
1739         const int linesAfter = AnnotationLines(line);
1740         DocModification mh(SC_MOD_CHANGEANNOTATION, LineStart(line), 0, 0, 0, line);
1741         mh.annotationLinesAdded = linesAfter - linesBefore;
1742         NotifyModified(mh);
1743 }
1744
1745 void Document::AnnotationSetStyle(int line, int style) {
1746         static_cast<LineAnnotation *>(perLineData[ldAnnotation])->SetStyle(line, style);
1747         DocModification mh(SC_MOD_CHANGEANNOTATION, LineStart(line), 0, 0, 0, line);
1748         NotifyModified(mh);
1749 }
1750
1751 void Document::AnnotationSetStyles(int line, const unsigned char *styles) {
1752         static_cast<LineAnnotation *>(perLineData[ldAnnotation])->SetStyles(line, styles);
1753 }
1754
1755 int Document::AnnotationLength(int line) const {
1756         return static_cast<LineAnnotation *>(perLineData[ldAnnotation])->Length(line);
1757 }
1758
1759 int Document::AnnotationLines(int line) const {
1760         return static_cast<LineAnnotation *>(perLineData[ldAnnotation])->Lines(line);
1761 }
1762
1763 void Document::AnnotationClearAll() {
1764         int maxEditorLine = LinesTotal();
1765         for (int l=0; l<maxEditorLine; l++)
1766                 AnnotationSetText(l, 0);
1767         // Free remaining data
1768         static_cast<LineAnnotation *>(perLineData[ldAnnotation])->ClearAll();
1769 }
1770
1771 void Document::IncrementStyleClock() {
1772         styleClock = (styleClock + 1) % 0x100000;
1773 }
1774
1775 void SCI_METHOD Document::DecorationFillRange(int position, int value, int fillLength) {
1776         if (decorations.FillRange(position, value, fillLength)) {
1777                 DocModification mh(SC_MOD_CHANGEINDICATOR | SC_PERFORMED_USER,
1778                                                         position, fillLength);
1779                 NotifyModified(mh);
1780         }
1781 }
1782
1783 bool Document::AddWatcher(DocWatcher *watcher, void *userData) {
1784         for (int i = 0; i < lenWatchers; i++) {
1785                 if ((watchers[i].watcher == watcher) &&
1786                         (watchers[i].userData == userData))
1787                         return false;
1788         }
1789         WatcherWithUserData *pwNew = new WatcherWithUserData[lenWatchers + 1];
1790         for (int j = 0; j < lenWatchers; j++)
1791                 pwNew[j] = watchers[j];
1792         pwNew[lenWatchers].watcher = watcher;
1793         pwNew[lenWatchers].userData = userData;
1794         delete []watchers;
1795         watchers = pwNew;
1796         lenWatchers++;
1797         return true;
1798 }
1799
1800 bool Document::RemoveWatcher(DocWatcher *watcher, void *userData) {
1801         for (int i = 0; i < lenWatchers; i++) {
1802                 if ((watchers[i].watcher == watcher) &&
1803                         (watchers[i].userData == userData)) {
1804                         if (lenWatchers == 1) {
1805                                 delete []watchers;
1806                                 watchers = 0;
1807                                 lenWatchers = 0;
1808                         } else {
1809                                 WatcherWithUserData *pwNew = new WatcherWithUserData[lenWatchers];
1810                                 for (int j = 0; j < lenWatchers - 1; j++) {
1811                                         pwNew[j] = (j < i) ? watchers[j] : watchers[j + 1];
1812                                 }
1813                                 delete []watchers;
1814                                 watchers = pwNew;
1815                                 lenWatchers--;
1816                         }
1817                         return true;
1818                 }
1819         }
1820         return false;
1821 }
1822
1823 void Document::NotifyModifyAttempt() {
1824         for (int i = 0; i < lenWatchers; i++) {
1825                 watchers[i].watcher->NotifyModifyAttempt(this, watchers[i].userData);
1826         }
1827 }
1828
1829 void Document::NotifySavePoint(bool atSavePoint) {
1830         for (int i = 0; i < lenWatchers; i++) {
1831                 watchers[i].watcher->NotifySavePoint(this, watchers[i].userData, atSavePoint);
1832         }
1833 }
1834
1835 void Document::NotifyModified(DocModification mh) {
1836         if (mh.modificationType & SC_MOD_INSERTTEXT) {
1837                 decorations.InsertSpace(mh.position, mh.length);
1838         } else if (mh.modificationType & SC_MOD_DELETETEXT) {
1839                 decorations.DeleteRange(mh.position, mh.length);
1840         }
1841         for (int i = 0; i < lenWatchers; i++) {
1842                 watchers[i].watcher->NotifyModified(this, mh, watchers[i].userData);
1843         }
1844 }
1845
1846 bool Document::IsWordPartSeparator(char ch) {
1847         return (WordCharClass(ch) == CharClassify::ccWord) && IsPunctuation(ch);
1848 }
1849
1850 int Document::WordPartLeft(int pos) {
1851         if (pos > 0) {
1852                 --pos;
1853                 char startChar = cb.CharAt(pos);
1854                 if (IsWordPartSeparator(startChar)) {
1855                         while (pos > 0 && IsWordPartSeparator(cb.CharAt(pos))) {
1856                                 --pos;
1857                         }
1858                 }
1859                 if (pos > 0) {
1860                         startChar = cb.CharAt(pos);
1861                         --pos;
1862                         if (IsLowerCase(startChar)) {
1863                                 while (pos > 0 && IsLowerCase(cb.CharAt(pos)))
1864                                         --pos;
1865                                 if (!IsUpperCase(cb.CharAt(pos)) && !IsLowerCase(cb.CharAt(pos)))
1866                                         ++pos;
1867                         } else if (IsUpperCase(startChar)) {
1868                                 while (pos > 0 && IsUpperCase(cb.CharAt(pos)))
1869                                         --pos;
1870                                 if (!IsUpperCase(cb.CharAt(pos)))
1871                                         ++pos;
1872                         } else if (IsADigit(startChar)) {
1873                                 while (pos > 0 && IsADigit(cb.CharAt(pos)))
1874                                         --pos;
1875                                 if (!IsADigit(cb.CharAt(pos)))
1876                                         ++pos;
1877                         } else if (IsPunctuation(startChar)) {
1878                                 while (pos > 0 && IsPunctuation(cb.CharAt(pos)))
1879                                         --pos;
1880                                 if (!IsPunctuation(cb.CharAt(pos)))
1881                                         ++pos;
1882                         } else if (isspacechar(startChar)) {
1883                                 while (pos > 0 && isspacechar(cb.CharAt(pos)))
1884                                         --pos;
1885                                 if (!isspacechar(cb.CharAt(pos)))
1886                                         ++pos;
1887                         } else if (!isascii(startChar)) {
1888                                 while (pos > 0 && !isascii(cb.CharAt(pos)))
1889                                         --pos;
1890                                 if (isascii(cb.CharAt(pos)))
1891                                         ++pos;
1892                         } else {
1893                                 ++pos;
1894                         }
1895                 }
1896         }
1897         return pos;
1898 }
1899
1900 int Document::WordPartRight(int pos) {
1901         char startChar = cb.CharAt(pos);
1902         int length = Length();
1903         if (IsWordPartSeparator(startChar)) {
1904                 while (pos < length && IsWordPartSeparator(cb.CharAt(pos)))
1905                         ++pos;
1906                 startChar = cb.CharAt(pos);
1907         }
1908         if (!isascii(startChar)) {
1909                 while (pos < length && !isascii(cb.CharAt(pos)))
1910                         ++pos;
1911         } else if (IsLowerCase(startChar)) {
1912                 while (pos < length && IsLowerCase(cb.CharAt(pos)))
1913                         ++pos;
1914         } else if (IsUpperCase(startChar)) {
1915                 if (IsLowerCase(cb.CharAt(pos + 1))) {
1916                         ++pos;
1917                         while (pos < length && IsLowerCase(cb.CharAt(pos)))
1918                                 ++pos;
1919                 } else {
1920                         while (pos < length && IsUpperCase(cb.CharAt(pos)))
1921                                 ++pos;
1922                 }
1923                 if (IsLowerCase(cb.CharAt(pos)) && IsUpperCase(cb.CharAt(pos - 1)))
1924                         --pos;
1925         } else if (IsADigit(startChar)) {
1926                 while (pos < length && IsADigit(cb.CharAt(pos)))
1927                         ++pos;
1928         } else if (IsPunctuation(startChar)) {
1929                 while (pos < length && IsPunctuation(cb.CharAt(pos)))
1930                         ++pos;
1931         } else if (isspacechar(startChar)) {
1932                 while (pos < length && isspacechar(cb.CharAt(pos)))
1933                         ++pos;
1934         } else {
1935                 ++pos;
1936         }
1937         return pos;
1938 }
1939
1940 bool IsLineEndChar(char c) {
1941         return (c == '\n' || c == '\r');
1942 }
1943
1944 int Document::ExtendStyleRange(int pos, int delta, bool singleLine) {
1945         int sStart = cb.StyleAt(pos);
1946         if (delta < 0) {
1947                 while (pos > 0 && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsLineEndChar(cb.CharAt(pos))))
1948                         pos--;
1949                 pos++;
1950         } else {
1951                 while (pos < (Length()) && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsLineEndChar(cb.CharAt(pos))))
1952                         pos++;
1953         }
1954         return pos;
1955 }
1956
1957 static char BraceOpposite(char ch) {
1958         switch (ch) {
1959         case '(':
1960                 return ')';
1961         case ')':
1962                 return '(';
1963         case '[':
1964                 return ']';
1965         case ']':
1966                 return '[';
1967         case '{':
1968                 return '}';
1969         case '}':
1970                 return '{';
1971         case '<':
1972                 return '>';
1973         case '>':
1974                 return '<';
1975         default:
1976                 return '\0';
1977         }
1978 }
1979
1980 // TODO: should be able to extend styled region to find matching brace
1981 int Document::BraceMatch(int position, int /*maxReStyle*/) {
1982         char chBrace = CharAt(position);
1983         char chSeek = BraceOpposite(chBrace);
1984         if (chSeek == '\0')
1985                 return - 1;
1986         char styBrace = static_cast<char>(StyleAt(position) & stylingBitsMask);
1987         int direction = -1;
1988         if (chBrace == '(' || chBrace == '[' || chBrace == '{' || chBrace == '<')
1989                 direction = 1;
1990         int depth = 1;
1991         position = NextPosition(position, direction);
1992         while ((position >= 0) && (position < Length())) {
1993                 char chAtPos = CharAt(position);
1994                 char styAtPos = static_cast<char>(StyleAt(position) & stylingBitsMask);
1995                 if ((position > GetEndStyled()) || (styAtPos == styBrace)) {
1996                         if (chAtPos == chBrace)
1997                                 depth++;
1998                         if (chAtPos == chSeek)
1999                                 depth--;
2000                         if (depth == 0)
2001                                 return position;
2002                 }
2003                 int positionBeforeMove = position;
2004                 position = NextPosition(position, direction);
2005                 if (position == positionBeforeMove)
2006                         break;
2007         }
2008         return - 1;
2009 }
2010
2011 /**
2012  * Implementation of RegexSearchBase for the default built-in regular expression engine
2013  */
2014 class BuiltinRegex : public RegexSearchBase {
2015 public:
2016         BuiltinRegex(CharClassify *charClassTable) : search(charClassTable), substituted(NULL) {}
2017
2018         virtual ~BuiltinRegex() {
2019                 delete substituted;
2020         }
2021
2022         virtual long FindText(Document *doc, int minPos, int maxPos, const char *s,
2023                         bool caseSensitive, bool word, bool wordStart, int flags,
2024                         int *length);
2025
2026         virtual const char *SubstituteByPosition(Document *doc, const char *text, int *length);
2027
2028 private:
2029         RESearch search;
2030         char *substituted;
2031 };
2032
2033 // Define a way for the Regular Expression code to access the document
2034 class DocumentIndexer : public CharacterIndexer {
2035         Document *pdoc;
2036         int end;
2037 public:
2038         DocumentIndexer(Document *pdoc_, int end_) :
2039                 pdoc(pdoc_), end(end_) {
2040         }
2041
2042         virtual ~DocumentIndexer() {
2043         }
2044
2045         virtual char CharAt(int index) {
2046                 if (index < 0 || index >= end)
2047                         return 0;
2048                 else
2049                         return pdoc->CharAt(index);
2050         }
2051 };
2052
2053 long BuiltinRegex::FindText(Document *doc, int minPos, int maxPos, const char *s,
2054                         bool caseSensitive, bool, bool, int flags,
2055                         int *length) {
2056         bool posix = (flags & SCFIND_POSIX) != 0;
2057         int increment = (minPos <= maxPos) ? 1 : -1;
2058
2059         int startPos = minPos;
2060         int endPos = maxPos;
2061
2062         // Range endpoints should not be inside DBCS characters, but just in case, move them.
2063         startPos = doc->MovePositionOutsideChar(startPos, 1, false);
2064         endPos = doc->MovePositionOutsideChar(endPos, 1, false);
2065
2066         const char *errmsg = search.Compile(s, *length, caseSensitive, posix);
2067         if (errmsg) {
2068                 return -1;
2069         }
2070         // Find a variable in a property file: \$(\([A-Za-z0-9_.]+\))
2071         // Replace first '.' with '-' in each property file variable reference:
2072         //     Search: \$(\([A-Za-z0-9_-]+\)\.\([A-Za-z0-9_.]+\))
2073         //     Replace: $(\1-\2)
2074         int lineRangeStart = doc->LineFromPosition(startPos);
2075         int lineRangeEnd = doc->LineFromPosition(endPos);
2076         if ((increment == 1) &&
2077                 (startPos >= doc->LineEnd(lineRangeStart)) &&
2078                 (lineRangeStart < lineRangeEnd)) {
2079                 // the start position is at end of line or between line end characters.
2080                 lineRangeStart++;
2081                 startPos = doc->LineStart(lineRangeStart);
2082         } else if ((increment == -1) &&
2083                    (startPos <= doc->LineStart(lineRangeStart)) &&
2084                    (lineRangeStart > lineRangeEnd)) {
2085                 // the start position is at beginning of line.
2086                 lineRangeStart--;
2087                 startPos = doc->LineEnd(lineRangeStart);
2088         }
2089         int pos = -1;
2090         int lenRet = 0;
2091         char searchEnd = s[*length - 1];
2092         char searchEndPrev = (*length > 1) ? s[*length - 2] : '\0';
2093         int lineRangeBreak = lineRangeEnd + increment;
2094         for (int line = lineRangeStart; line != lineRangeBreak; line += increment) {
2095                 int startOfLine = doc->LineStart(line);
2096                 int endOfLine = doc->LineEnd(line);
2097                 if (increment == 1) {
2098                         if (line == lineRangeStart) {
2099                                 if ((startPos != startOfLine) && (s[0] == '^'))
2100                                         continue;       // Can't match start of line if start position after start of line
2101                                 startOfLine = startPos;
2102                         }
2103                         if (line == lineRangeEnd) {
2104                                 if ((endPos != endOfLine) && (searchEnd == '$') && (searchEndPrev != '\\'))
2105                                         continue;       // Can't match end of line if end position before end of line
2106                                 endOfLine = endPos;
2107                         }
2108                 } else {
2109                         if (line == lineRangeEnd) {
2110                                 if ((endPos != startOfLine) && (s[0] == '^'))
2111                                         continue;       // Can't match start of line if end position after start of line
2112                                 startOfLine = endPos;
2113                         }
2114                         if (line == lineRangeStart) {
2115                                 if ((startPos != endOfLine) && (searchEnd == '$') && (searchEndPrev != '\\'))
2116                                         continue;       // Can't match end of line if start position before end of line
2117                                 endOfLine = startPos;
2118                         }
2119                 }
2120
2121                 DocumentIndexer di(doc, endOfLine);
2122                 int success = search.Execute(di, startOfLine, endOfLine);
2123                 if (success) {
2124                         pos = search.bopat[0];
2125                         lenRet = search.eopat[0] - search.bopat[0];
2126                         // There can be only one start of a line, so no need to look for last match in line
2127                         if ((increment == -1) && (s[0] != '^')) {
2128                                 // Check for the last match on this line.
2129                                 int repetitions = 1000; // Break out of infinite loop
2130                                 while (success && (search.eopat[0] <= endOfLine) && (repetitions--)) {
2131                                         success = search.Execute(di, pos+1, endOfLine);
2132                                         if (success) {
2133                                                 if (search.eopat[0] <= minPos) {
2134                                                         pos = search.bopat[0];
2135                                                         lenRet = search.eopat[0] - search.bopat[0];
2136                                                 } else {
2137                                                         success = 0;
2138                                                 }
2139                                         }
2140                                 }
2141                         }
2142                         break;
2143                 }
2144         }
2145         *length = lenRet;
2146         return pos;
2147 }
2148
2149 const char *BuiltinRegex::SubstituteByPosition(Document *doc, const char *text, int *length) {
2150         delete []substituted;
2151         substituted = 0;
2152         DocumentIndexer di(doc, doc->Length());
2153         if (!search.GrabMatches(di))
2154                 return 0;
2155         unsigned int lenResult = 0;
2156         for (int i = 0; i < *length; i++) {
2157                 if (text[i] == '\\') {
2158                         if (text[i + 1] >= '1' && text[i + 1] <= '9') {
2159                                 unsigned int patNum = text[i + 1] - '0';
2160                                 lenResult += search.eopat[patNum] - search.bopat[patNum];
2161                                 i++;
2162                         } else {
2163                                 switch (text[i + 1]) {
2164                                 case 'a':
2165                                 case 'b':
2166                                 case 'f':
2167                                 case 'n':
2168                                 case 'r':
2169                                 case 't':
2170                                 case 'v':
2171                                 case '\\':
2172                                         i++;
2173                                 }
2174                                 lenResult++;
2175                         }
2176                 } else {
2177                         lenResult++;
2178                 }
2179         }
2180         substituted = new char[lenResult + 1];
2181         char *o = substituted;
2182         for (int j = 0; j < *length; j++) {
2183                 if (text[j] == '\\') {
2184                         if (text[j + 1] >= '1' && text[j + 1] <= '9') {
2185                                 unsigned int patNum = text[j + 1] - '0';
2186                                 unsigned int len = search.eopat[patNum] - search.bopat[patNum];
2187                                 if (search.pat[patNum]) // Will be null if try for a match that did not occur
2188                                         memcpy(o, search.pat[patNum], len);
2189                                 o += len;
2190                                 j++;
2191                         } else {
2192                                 j++;
2193                                 switch (text[j]) {
2194                                 case 'a':
2195                                         *o++ = '\a';
2196                                         break;
2197                                 case 'b':
2198                                         *o++ = '\b';
2199                                         break;
2200                                 case 'f':
2201                                         *o++ = '\f';
2202                                         break;
2203                                 case 'n':
2204                                         *o++ = '\n';
2205                                         break;
2206                                 case 'r':
2207                                         *o++ = '\r';
2208                                         break;
2209                                 case 't':
2210                                         *o++ = '\t';
2211                                         break;
2212                                 case 'v':
2213                                         *o++ = '\v';
2214                                         break;
2215                                 case '\\':
2216                                         *o++ = '\\';
2217                                         break;
2218                                 default:
2219                                         *o++ = '\\';
2220                                         j--;
2221                                 }
2222                         }
2223                 } else {
2224                         *o++ = text[j];
2225                 }
2226         }
2227         *o = '\0';
2228         *length = lenResult;
2229         return substituted;
2230 }
2231
2232 #ifndef SCI_OWNREGEX
2233
2234 #ifdef SCI_NAMESPACE
2235
2236 RegexSearchBase *Scintilla::CreateRegexSearch(CharClassify *charClassTable) {
2237         return new BuiltinRegex(charClassTable);
2238 }
2239
2240 #else
2241
2242 RegexSearchBase *CreateRegexSearch(CharClassify *charClassTable) {
2243         return new BuiltinRegex(charClassTable);
2244 }
2245
2246 #endif
2247
2248 #endif