plugins/scintilla/scintilla/Document.cxx

   1 // Scintilla source code edit control
   2 /** @file Document.cxx
   3  ** Text document that handles notifications, DBCS, styling, words and end of line.
   4  **/
   5 // Copyright 1998-2011 by Neil Hodgson <neilh@scintilla.org>
   6 // The License.txt file describes the conditions under which this software may be distributed.
   7
   8 #include <stdlib.h>
   9 #include <string.h>
  10 #include <stdio.h>
  11 #include <ctype.h>
  12 #include <assert.h>
  13
  14 #include <string>
  15 #include <vector>
  16
  17 #include "Platform.h"
  18
  19 #include "ILexer.h"
  20 #include "Scintilla.h"
  21
  22 #include "SplitVector.h"
  23 #include "Partitioning.h"
  24 #include "RunStyles.h"
  25 #include "CellBuffer.h"
  26 #include "PerLine.h"
  27 #include "CharClassify.h"
  28 #include "CharacterSet.h"
  29 #include "Decoration.h"
  30 #include "Document.h"
  31 #include "RESearch.h"
  32 #include "UniConversion.h"
  33
  34 #ifdef SCI_NAMESPACE
  35 using namespace Scintilla;
  36 #endif
  37
  38 // This is ASCII specific but is safe with chars >= 0x80
  39 static inline bool isspacechar(unsigned char ch) {
  40         return (ch == ' ') || ((ch >= 0x09) && (ch <= 0x0d));
  41 }
  42
  43 static inline bool IsPunctuation(char ch) {
  44         return isascii(ch) && ispunct(ch);
  45 }
  46
  47 static inline bool IsADigit(char ch) {
  48         return isascii(ch) && isdigit(ch);
  49 }
  50
  51 static inline bool IsLowerCase(char ch) {
  52         return isascii(ch) && islower(ch);
  53 }
  54
  55 static inline bool IsUpperCase(char ch) {
  56         return isascii(ch) && isupper(ch);
  57 }
  58
  59 void LexInterface::Colourise(int start, int end) {
  60         ElapsedTime et;
  61         if (pdoc && instance && !performingStyle) {
  62                 // Protect against reentrance, which may occur, for example, when
  63                 // fold points are discovered while performing styling and the folding
  64                 // code looks for child lines which may trigger styling.
  65                 performingStyle = true;
  66
  67                 int lengthDoc = pdoc->Length();
  68                 if (end == -1)
  69                         end = lengthDoc;
  70                 int len = end - start;
  71
  72                 PLATFORM_ASSERT(len >= 0);
  73                 PLATFORM_ASSERT(start + len <= lengthDoc);
  74
  75                 int styleStart = 0;
  76                 if (start > 0)
  77                         styleStart = pdoc->StyleAt(start - 1) & pdoc->stylingBitsMask;
  78
  79                 if (len > 0) {
  80                         instance->Lex(start, len, styleStart, pdoc);
  81                         instance->Fold(start, len, styleStart, pdoc);
  82                 }
  83
  84                 performingStyle = false;
  85         }
  86 }
  87
  88 Document::Document() {
  89         refCount = 0;
  90 #ifdef __unix__
  91         eolMode = SC_EOL_LF;
  92 #else
  93         eolMode = SC_EOL_CRLF;
  94 #endif
  95         dbcsCodePage = 0;
  96         stylingBits = 5;
  97         stylingBitsMask = 0x1F;
  98         stylingMask = 0;
  99         endStyled = 0;
 100         styleClock = 0;
 101         enteredModification = 0;
 102         enteredStyling = 0;
 103         enteredReadOnlyCount = 0;
 104         tabInChars = 8;
 105         indentInChars = 0;
 106         actualIndentInChars = 8;
 107         useTabs = true;
 108         tabIndents = true;
 109         backspaceUnindents = false;
 110         watchers = 0;
 111         lenWatchers = 0;
 112
 113         matchesValid = false;
 114         regex = 0;
 115
 116         perLineData[ldMarkers] = new LineMarkers();
 117         perLineData[ldLevels] = new LineLevels();
 118         perLineData[ldState] = new LineState();
 119         perLineData[ldMargin] = new LineAnnotation();
 120         perLineData[ldAnnotation] = new LineAnnotation();
 121
 122         cb.SetPerLine(this);
 123
 124         pli = 0;
 125 }
 126
 127 Document::~Document() {
 128         for (int i = 0; i < lenWatchers; i++) {
 129                 watchers[i].watcher->NotifyDeleted(this, watchers[i].userData);
 130         }
 131         delete []watchers;
 132         for (int j=0; j<ldSize; j++) {
 133                 delete perLineData[j];
 134                 perLineData[j] = 0;
 135         }
 136         watchers = 0;
 137         lenWatchers = 0;
 138         delete regex;
 139         regex = 0;
 140         delete pli;
 141         pli = 0;
 142 }
 143
 144 void Document::Init() {
 145         for (int j=0; j<ldSize; j++) {
 146                 if (perLineData[j])
 147                         perLineData[j]->Init();
 148         }
 149 }
 150
 151 void Document::InsertLine(int line) {
 152         for (int j=0; j<ldSize; j++) {
 153                 if (perLineData[j])
 154                         perLineData[j]->InsertLine(line);
 155         }
 156 }
 157
 158 void Document::RemoveLine(int line) {
 159         for (int j=0; j<ldSize; j++) {
 160                 if (perLineData[j])
 161                         perLineData[j]->RemoveLine(line);
 162         }
 163 }
 164
 165 // Increase reference count and return its previous value.
 166 int Document::AddRef() {
 167         return refCount++;
 168 }
 169
 170 // Decrease reference count and return its previous value.
 171 // Delete the document if reference count reaches zero.
 172 int Document::Release() {
 173         int curRefCount = --refCount;
 174         if (curRefCount == 0)
 175                 delete this;
 176         return curRefCount;
 177 }
 178
 179 void Document::SetSavePoint() {
 180         cb.SetSavePoint();
 181         NotifySavePoint(true);
 182 }
 183
 184 int Document::GetMark(int line) {
 185         return static_cast<LineMarkers *>(perLineData[ldMarkers])->MarkValue(line);
 186 }
 187
 188 int Document::AddMark(int line, int markerNum) {
 189         if (line >= 0 && line <= LinesTotal()) {
 190                 int prev = static_cast<LineMarkers *>(perLineData[ldMarkers])->
 191                         AddMark(line, markerNum, LinesTotal());
 192                 DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, 0, line);
 193                 NotifyModified(mh);
 194                 return prev;
 195         } else {
 196                 return 0;
 197         }
 198 }
 199
 200 void Document::AddMarkSet(int line, int valueSet) {
 201         if (line < 0 || line > LinesTotal()) {
 202                 return;
 203         }
 204         unsigned int m = valueSet;
 205         for (int i = 0; m; i++, m >>= 1)
 206                 if (m & 1)
 207                         static_cast<LineMarkers *>(perLineData[ldMarkers])->
 208                                 AddMark(line, i, LinesTotal());
 209         DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, 0, line);
 210         NotifyModified(mh);
 211 }
 212
 213 void Document::DeleteMark(int line, int markerNum) {
 214         static_cast<LineMarkers *>(perLineData[ldMarkers])->DeleteMark(line, markerNum, false);
 215         DocModification mh(SC_MOD_CHANGEMARKER, LineStart(line), 0, 0, 0, line);
 216         NotifyModified(mh);
 217 }
 218
 219 void Document::DeleteMarkFromHandle(int markerHandle) {
 220         static_cast<LineMarkers *>(perLineData[ldMarkers])->DeleteMarkFromHandle(markerHandle);
 221         DocModification mh(SC_MOD_CHANGEMARKER, 0, 0, 0, 0);
 222         mh.line = -1;
 223         NotifyModified(mh);
 224 }
 225
 226 void Document::DeleteAllMarks(int markerNum) {
 227         bool someChanges = false;
 228         for (int line = 0; line < LinesTotal(); line++) {
 229                 if (static_cast<LineMarkers *>(perLineData[ldMarkers])->DeleteMark(line, markerNum, true))
 230                         someChanges = true;
 231         }
 232         if (someChanges) {
 233                 DocModification mh(SC_MOD_CHANGEMARKER, 0, 0, 0, 0);
 234                 mh.line = -1;
 235                 NotifyModified(mh);
 236         }
 237 }
 238
 239 int Document::LineFromHandle(int markerHandle) {
 240         return static_cast<LineMarkers *>(perLineData[ldMarkers])->LineFromHandle(markerHandle);
 241 }
 242
 243 int SCI_METHOD Document::LineStart(int line) const {
 244         return cb.LineStart(line);
 245 }
 246
 247 int Document::LineEnd(int line) const {
 248         if (line == LinesTotal() - 1) {
 249                 return LineStart(line + 1);
 250         } else {
 251                 int position = LineStart(line + 1) - 1;
 252                 // When line terminator is CR+LF, may need to go back one more
 253                 if ((position > LineStart(line)) && (cb.CharAt(position - 1) == '\r')) {
 254                         position--;
 255                 }
 256                 return position;
 257         }
 258 }
 259
 260 void SCI_METHOD Document::SetErrorStatus(int status) {
 261         // Tell the watchers the lexer has changed.
 262         for (int i = 0; i < lenWatchers; i++) {
 263                 watchers[i].watcher->NotifyErrorOccurred(this, watchers[i].userData, status);
 264         }
 265 }
 266
 267 int SCI_METHOD Document::LineFromPosition(int pos) const {
 268         return cb.LineFromPosition(pos);
 269 }
 270
 271 int Document::LineEndPosition(int position) const {
 272         return LineEnd(LineFromPosition(position));
 273 }
 274
 275 bool Document::IsLineEndPosition(int position) const {
 276         return LineEnd(LineFromPosition(position)) == position;
 277 }
 278
 279 int Document::VCHomePosition(int position) const {
 280         int line = LineFromPosition(position);
 281         int startPosition = LineStart(line);
 282         int endLine = LineEnd(line);
 283         int startText = startPosition;
 284         while (startText < endLine && (cb.CharAt(startText) == ' ' || cb.CharAt(startText) == '\t'))
 285                 startText++;
 286         if (position == startText)
 287                 return startPosition;
 288         else
 289                 return startText;
 290 }
 291
 292 int SCI_METHOD Document::SetLevel(int line, int level) {
 293         int prev = static_cast<LineLevels *>(perLineData[ldLevels])->SetLevel(line, level, LinesTotal());
 294         if (prev != level) {
 295                 DocModification mh(SC_MOD_CHANGEFOLD | SC_MOD_CHANGEMARKER,
 296                                    LineStart(line), 0, 0, 0, line);
 297                 mh.foldLevelNow = level;
 298                 mh.foldLevelPrev = prev;
 299                 NotifyModified(mh);
 300         }
 301         return prev;
 302 }
 303
 304 int SCI_METHOD Document::GetLevel(int line) const {
 305         return static_cast<LineLevels *>(perLineData[ldLevels])->GetLevel(line);
 306 }
 307
 308 void Document::ClearLevels() {
 309         static_cast<LineLevels *>(perLineData[ldLevels])->ClearLevels();
 310 }
 311
 312 static bool IsSubordinate(int levelStart, int levelTry) {
 313         if (levelTry & SC_FOLDLEVELWHITEFLAG)
 314                 return true;
 315         else
 316                 return (levelStart & SC_FOLDLEVELNUMBERMASK) < (levelTry & SC_FOLDLEVELNUMBERMASK);
 317 }
 318
 319 int Document::GetLastChild(int lineParent, int level) {
 320         if (level == -1)
 321                 level = GetLevel(lineParent) & SC_FOLDLEVELNUMBERMASK;
 322         int maxLine = LinesTotal();
 323         int lineMaxSubord = lineParent;
 324         while (lineMaxSubord < maxLine - 1) {
 325                 EnsureStyledTo(LineStart(lineMaxSubord + 2));
 326                 if (!IsSubordinate(level, GetLevel(lineMaxSubord + 1)))
 327                         break;
 328                 lineMaxSubord++;
 329         }
 330         if (lineMaxSubord > lineParent) {
 331                 if (level > (GetLevel(lineMaxSubord + 1) & SC_FOLDLEVELNUMBERMASK)) {
 332                         // Have chewed up some whitespace that belongs to a parent so seek back
 333                         if (GetLevel(lineMaxSubord) & SC_FOLDLEVELWHITEFLAG) {
 334                                 lineMaxSubord--;
 335                         }
 336                 }
 337         }
 338         return lineMaxSubord;
 339 }
 340
 341 int Document::GetFoldParent(int line) {
 342         int level = GetLevel(line) & SC_FOLDLEVELNUMBERMASK;
 343         int lineLook = line - 1;
 344         while ((lineLook > 0) && (
 345                     (!(GetLevel(lineLook) & SC_FOLDLEVELHEADERFLAG)) ||
 346                     ((GetLevel(lineLook) & SC_FOLDLEVELNUMBERMASK) >= level))
 347               ) {
 348                 lineLook--;
 349         }
 350         if ((GetLevel(lineLook) & SC_FOLDLEVELHEADERFLAG) &&
 351                 ((GetLevel(lineLook) & SC_FOLDLEVELNUMBERMASK) < level)) {
 352                 return lineLook;
 353         } else {
 354                 return -1;
 355         }
 356 }
 357
 358 void Document::GetHighlightDelimiters(HighlightDelimiter &highlightDelimiter, int line, int topLine, int bottomLine) {
 359         int noNeedToParseBefore = Platform::Minimum(line, topLine) - 1;
 360         int noNeedToParseAfter = Platform::Maximum(line, bottomLine) + 1;
 361         int endLine = LineFromPosition(Length());
 362         int beginFoldBlock = noNeedToParseBefore;
 363         int endFoldBlock = -1;
 364         int beginMarginCorrectlyDrawnZone = noNeedToParseBefore;
 365         int endMarginCorrectlyDrawnZone = noNeedToParseAfter;
 366         int endOfTailOfWhiteFlag = -1; //endOfTailOfWhiteFlag points the last SC_FOLDLEVELWHITEFLAG if follow a fold block. Otherwise endOfTailOfWhiteFlag points end of fold block.
 367         int level = GetLevel(line);
 368         int levelNumber = -1;
 369         int lineLookLevel = 0;
 370         int lineLookLevelNumber = -1;
 371         int lineLook = line;
 372         bool beginFoldBlockFound = false;
 373         bool endFoldBlockFound = false;
 374         bool beginMarginCorrectlyDrawnZoneFound = false;
 375         bool endMarginCorrectlyDrawnZoneFound = false;
 376
 377         /*******************************************************************************/
 378         /*      search backward (beginFoldBlock & beginMarginCorrectlyDrawnZone)       */
 379         /*******************************************************************************/
 380         for (endOfTailOfWhiteFlag = line; (lineLook > noNeedToParseBefore || (lineLookLevel & SC_FOLDLEVELWHITEFLAG)) && (!beginFoldBlockFound || !beginMarginCorrectlyDrawnZoneFound); --lineLook) {
 381                 lineLookLevel = GetLevel(lineLook);
 382                 if (levelNumber != -1) {
 383                         lineLookLevelNumber = lineLookLevel & SC_FOLDLEVELNUMBERMASK;
 384                         if (!beginMarginCorrectlyDrawnZoneFound && (lineLookLevelNumber > levelNumber)) {
 385                                 beginMarginCorrectlyDrawnZoneFound = true;
 386                                 beginMarginCorrectlyDrawnZone = endOfTailOfWhiteFlag;
 387                         }
 388                         //find the last space line (SC_FOLDLEVELWHITEFLAG).
 389                         if (!beginMarginCorrectlyDrawnZoneFound && !(lineLookLevel & SC_FOLDLEVELWHITEFLAG)) {
 390                                 endOfTailOfWhiteFlag = lineLook - 1;
 391                         }
 392                         if (!beginFoldBlockFound && (lineLookLevelNumber < levelNumber)) {
 393                                 beginFoldBlockFound = true;
 394                                 beginFoldBlock = lineLook;
 395                                 if (!beginMarginCorrectlyDrawnZoneFound) {
 396                                         beginMarginCorrectlyDrawnZoneFound = true;
 397                                         beginMarginCorrectlyDrawnZone = lineLook - 1;
 398                                 }
 399                         } else  if (!beginFoldBlockFound && lineLookLevelNumber == SC_FOLDLEVELBASE) {
 400                                 beginFoldBlockFound = true;
 401                                 beginFoldBlock = -1;
 402                         }
 403                 } else if (!(lineLookLevel & SC_FOLDLEVELWHITEFLAG)) {
 404                         endOfTailOfWhiteFlag = lineLook - 1;
 405                         levelNumber = lineLookLevel & SC_FOLDLEVELNUMBERMASK;
 406                         if (lineLookLevel & SC_FOLDLEVELHEADERFLAG &&
 407                                 //Managed the folding block when a fold header does not have any subordinate lines to fold away.
 408                                 (levelNumber < (GetLevel(lineLook + 1) & SC_FOLDLEVELNUMBERMASK))) {
 409                                 beginFoldBlockFound = true;
 410                                 beginFoldBlock = lineLook;
 411                                 beginMarginCorrectlyDrawnZoneFound = true;
 412                                 beginMarginCorrectlyDrawnZone = endOfTailOfWhiteFlag;
 413                                 levelNumber = GetLevel(lineLook + 1) & SC_FOLDLEVELNUMBERMASK;;
 414                         }
 415                 }
 416         }
 417
 418         /****************************************************************************/
 419         /*       search forward (endStartBlock & endMarginCorrectlyDrawnZone)       */
 420         /****************************************************************************/
 421         if (level & SC_FOLDLEVELHEADERFLAG) {
 422                 //ignore this line because this line is on first one of block.
 423                 lineLook = line + 1;
 424         } else {
 425                 lineLook = line;
 426         }
 427         for (; lineLook < noNeedToParseAfter && (!endFoldBlockFound || !endMarginCorrectlyDrawnZoneFound); ++lineLook) {
 428                 lineLookLevel = GetLevel(lineLook);
 429                 lineLookLevelNumber = lineLookLevel & SC_FOLDLEVELNUMBERMASK;
 430                 if (!endFoldBlockFound && !(lineLookLevel & SC_FOLDLEVELWHITEFLAG) && lineLookLevelNumber < levelNumber) {
 431                         endFoldBlockFound = true;
 432                         endFoldBlock = lineLook - 1;
 433                         if (!endMarginCorrectlyDrawnZoneFound) {
 434                                 endMarginCorrectlyDrawnZoneFound = true;
 435                                 endMarginCorrectlyDrawnZone = lineLook;
 436                         }
 437                 } else if (!endFoldBlockFound && lineLookLevel == SC_FOLDLEVELBASE) {
 438                         endFoldBlockFound = true;
 439                         endFoldBlock = -1;
 440                 }
 441                 if (!endMarginCorrectlyDrawnZoneFound && (lineLookLevel & SC_FOLDLEVELHEADERFLAG) &&
 442                         //Managed the folding block when a fold header does not have any subordinate lines to fold away.
 443                         (levelNumber < (GetLevel(lineLook + 1) & SC_FOLDLEVELNUMBERMASK))) {
 444                         endMarginCorrectlyDrawnZoneFound = true;
 445                         endMarginCorrectlyDrawnZone = lineLook;
 446                 }
 447         }
 448         if (!endFoldBlockFound && ((lineLook > endLine && lineLookLevelNumber < levelNumber) ||
 449                 (levelNumber > SC_FOLDLEVELBASE))) {
 450                 //manage when endfold is incorrect or on last line.
 451                 endFoldBlock = lineLook - 1;
 452                 //useless to set endMarginCorrectlyDrawnZone.
 453                 //if endMarginCorrectlyDrawnZoneFound equals false then endMarginCorrectlyDrawnZone already equals to endLine + 1.
 454         }
 455
 456         highlightDelimiter.beginFoldBlock = beginFoldBlock;
 457         highlightDelimiter.endFoldBlock = endFoldBlock;
 458         highlightDelimiter.beginMarginCorrectlyDrawnZone = beginMarginCorrectlyDrawnZone;
 459         highlightDelimiter.endMarginCorrectlyDrawnZone = endMarginCorrectlyDrawnZone;
 460 }
 461
 462 int Document::ClampPositionIntoDocument(int pos) {
 463         return Platform::Clamp(pos, 0, Length());
 464 }
 465
 466 bool Document::IsCrLf(int pos) {
 467         if (pos < 0)
 468                 return false;
 469         if (pos >= (Length() - 1))
 470                 return false;
 471         return (cb.CharAt(pos) == '\r') && (cb.CharAt(pos + 1) == '\n');
 472 }
 473
 474 int Document::LenChar(int pos) {
 475         if (pos < 0) {
 476                 return 1;
 477         } else if (IsCrLf(pos)) {
 478                 return 2;
 479         } else if (SC_CP_UTF8 == dbcsCodePage) {
 480                 unsigned char ch = static_cast<unsigned char>(cb.CharAt(pos));
 481                 if (ch < 0x80)
 482                         return 1;
 483                 int len = 2;
 484                 if (ch >= (0x80 + 0x40 + 0x20 + 0x10))
 485                         len = 4;
 486                 else if (ch >= (0x80 + 0x40 + 0x20))
 487                         len = 3;
 488                 int lengthDoc = Length();
 489                 if ((pos + len) > lengthDoc)
 490                         return lengthDoc -pos;
 491                 else
 492                         return len;
 493         } else if (dbcsCodePage) {
 494                 return IsDBCSLeadByte(cb.CharAt(pos)) ? 2 : 1;
 495         } else {
 496                 return 1;
 497         }
 498 }
 499
 500 static bool IsTrailByte(int ch) {
 501         return (ch >= 0x80) && (ch < (0x80 + 0x40));
 502 }
 503
 504 static int BytesFromLead(int leadByte) {
 505         if (leadByte > 0xF4) {
 506                 // Characters longer than 4 bytes not possible in current UTF-8
 507                 return 0;
 508         } else if (leadByte >= 0xF0) {
 509                 return 4;
 510         } else if (leadByte >= 0xE0) {
 511                 return 3;
 512         } else if (leadByte >= 0xC2) {
 513                 return 2;
 514         }
 515         return 0;
 516 }
 517
 518 bool Document::InGoodUTF8(int pos, int &start, int &end) const {
 519         int lead = pos;
 520         while ((lead>0) && (pos-lead < 4) && IsTrailByte(static_cast<unsigned char>(cb.CharAt(lead-1))))
 521                 lead--;
 522         start = 0;
 523         if (lead > 0) {
 524                 start = lead-1;
 525         }
 526         int leadByte = static_cast<unsigned char>(cb.CharAt(start));
 527         int bytes = BytesFromLead(leadByte);
 528         if (bytes == 0) {
 529                 return false;
 530         } else {
 531                 int trailBytes = bytes - 1;
 532                 int len = pos - lead + 1;
 533                 if (len > trailBytes)
 534                         // pos too far from lead
 535                         return false;
 536                 // Check that there are enough trails for this lead
 537                 int trail = pos + 1;
 538                 while ((trail-lead<trailBytes) && (trail < Length())) {
 539                         if (!IsTrailByte(static_cast<unsigned char>(cb.CharAt(trail)))) {
 540                                 return false;
 541                         }
 542                         trail++;
 543                 }
 544                 end = start + bytes;
 545                 return true;
 546         }
 547 }
 548
 549 // Normalise a position so that it is not halfway through a two byte character.
 550 // This can occur in two situations -
 551 // When lines are terminated with \r\n pairs which should be treated as one character.
 552 // When displaying DBCS text such as Japanese.
 553 // If moving, move the position in the indicated direction.
 554 int Document::MovePositionOutsideChar(int pos, int moveDir, bool checkLineEnd) {
 555         //Platform::DebugPrintf("NoCRLF %d %d\n", pos, moveDir);
 556         // If out of range, just return minimum/maximum value.
 557         if (pos <= 0)
 558                 return 0;
 559         if (pos >= Length())
 560                 return Length();
 561
 562         // PLATFORM_ASSERT(pos > 0 && pos < Length());
 563         if (checkLineEnd && IsCrLf(pos - 1)) {
 564                 if (moveDir > 0)
 565                         return pos + 1;
 566                 else
 567                         return pos - 1;
 568         }
 569
 570         if (dbcsCodePage) {
 571                 if (SC_CP_UTF8 == dbcsCodePage) {
 572                         unsigned char ch = static_cast<unsigned char>(cb.CharAt(pos));
 573                         int startUTF = pos;
 574                         int endUTF = pos;
 575                         if (IsTrailByte(ch) && InGoodUTF8(pos, startUTF, endUTF)) {
 576                                 // ch is a trail byte within a UTF-8 character
 577                                 if (moveDir > 0)
 578                                         pos = endUTF;
 579                                 else
 580                                         pos = startUTF;
 581                         }
 582                 } else {
 583                         // Anchor DBCS calculations at start of line because start of line can
 584                         // not be a DBCS trail byte.
 585                         int posStartLine = LineStart(LineFromPosition(pos));
 586                         if (pos == posStartLine)
 587                                 return pos;
 588
 589                         // Step back until a non-lead-byte is found.
 590                         int posCheck = pos;
 591                         while ((posCheck > posStartLine) && IsDBCSLeadByte(cb.CharAt(posCheck-1)))
 592                                 posCheck--;
 593
 594                         // Check from known start of character.
 595                         while (posCheck < pos) {
 596                                 int mbsize = IsDBCSLeadByte(cb.CharAt(posCheck)) ? 2 : 1;
 597                                 if (posCheck + mbsize == pos) {
 598                                         return pos;
 599                                 } else if (posCheck + mbsize > pos) {
 600                                         if (moveDir > 0) {
 601                                                 return posCheck + mbsize;
 602                                         } else {
 603                                                 return posCheck;
 604                                         }
 605                                 }
 606                                 posCheck += mbsize;
 607                         }
 608                 }
 609         }
 610
 611         return pos;
 612 }
 613
 614 // NextPosition moves between valid positions - it can not handle a position in the middle of a
 615 // multi-byte character. It is used to iterate through text more efficiently than MovePositionOutsideChar.
 616 // A \r\n pair is treated as two characters.
 617 int Document::NextPosition(int pos, int moveDir) const {
 618         // If out of range, just return minimum/maximum value.
 619         int increment = (moveDir > 0) ? 1 : -1;
 620         if (pos + increment <= 0)
 621                 return 0;
 622         if (pos + increment >= Length())
 623                 return Length();
 624
 625         if (dbcsCodePage) {
 626                 if (SC_CP_UTF8 == dbcsCodePage) {
 627                         pos += increment;
 628                         unsigned char ch = static_cast<unsigned char>(cb.CharAt(pos));
 629                         int startUTF = pos;
 630                         int endUTF = pos;
 631                         if (IsTrailByte(ch) && InGoodUTF8(pos, startUTF, endUTF)) {
 632                                 // ch is a trail byte within a UTF-8 character
 633                                 if (moveDir > 0)
 634                                         pos = endUTF;
 635                                 else
 636                                         pos = startUTF;
 637                         }
 638                 } else {
 639                         if (moveDir > 0) {
 640                                 int mbsize = IsDBCSLeadByte(cb.CharAt(pos)) ? 2 : 1;
 641                                 pos += mbsize;
 642                                 if (pos > Length())
 643                                         pos = Length();
 644                         } else {
 645                                 // Anchor DBCS calculations at start of line because start of line can
 646                                 // not be a DBCS trail byte.
 647                                 int posStartLine = LineStart(LineFromPosition(pos));
 648                                 // See http://msdn.microsoft.com/en-us/library/cc194792%28v=MSDN.10%29.aspx
 649                                 // http://msdn.microsoft.com/en-us/library/cc194790.aspx
 650                                 if ((pos - 1) <= posStartLine) {
 651                                         return pos - 1;
 652                                 } else if (IsDBCSLeadByte(cb.CharAt(pos - 1))) {
 653                                         // Must actually be trail byte
 654                                         return pos - 2;
 655                                 } else {
 656                                         // Otherwise, step back until a non-lead-byte is found.
 657                                         int posTemp = pos - 1;
 658                                         while (posStartLine <= --posTemp && IsDBCSLeadByte(cb.CharAt(posTemp)))
 659                                                 ;
 660                                         // Now posTemp+1 must point to the beginning of a character,
 661                                         // so figure out whether we went back an even or an odd
 662                                         // number of bytes and go back 1 or 2 bytes, respectively.
 663                                         return (pos - 1 - ((pos - posTemp) & 1));
 664                                 }
 665                         }
 666                 }
 667         } else {
 668                 pos += increment;
 669         }
 670
 671         return pos;
 672 }
 673
 674 bool Document::NextCharacter(int &pos, int moveDir) {
 675         // Returns true if pos changed
 676         int posNext = NextPosition(pos, moveDir);
 677         if (posNext == pos) {
 678                 return false;
 679         } else {
 680                 pos = posNext;
 681                 return true;
 682         }
 683 }
 684
 685 int SCI_METHOD Document::CodePage() const {
 686         return dbcsCodePage;
 687 }
 688
 689 bool SCI_METHOD Document::IsDBCSLeadByte(char ch) const {
 690         // Byte ranges found in Wikipedia articles with relevant search strings in each case
 691         unsigned char uch = static_cast<unsigned char>(ch);
 692         switch (dbcsCodePage) {
 693                 case 932:
 694                         // Shift_jis
 695                         return ((uch >= 0x81) && (uch <= 0x9F)) ||
 696                                 ((uch >= 0xE0) && (uch <= 0xEF));
 697                 case 936:
 698                         // GBK
 699                         return (uch >= 0x81) && (uch <= 0xFE);
 700                 case 949:
 701                         // Korean Wansung KS C-5601-1987
 702                         return (uch >= 0x81) && (uch <= 0xFE);
 703                 case 950:
 704                         // Big5
 705                         return (uch >= 0x81) && (uch <= 0xFE);
 706                 case 1361:
 707                         // Korean Johab KS C-5601-1992
 708                         return
 709                                 ((uch >= 0x84) && (uch <= 0xD3)) ||
 710                                 ((uch >= 0xD8) && (uch <= 0xDE)) ||
 711                                 ((uch >= 0xE0) && (uch <= 0xF9));
 712         }
 713         return false;
 714 }
 715
 716 inline bool IsSpaceOrTab(int ch) {
 717         return ch == ' ' || ch == '\t';
 718 }
 719
 720 // Need to break text into segments near lengthSegment but taking into
 721 // account the encoding to not break inside a UTF-8 or DBCS character
 722 // and also trying to avoid breaking inside a pair of combining characters.
 723 // The segment length must always be long enough (more than 4 bytes)
 724 // so that there will be at least one whole character to make a segment.
 725 // For UTF-8, text must consist only of valid whole characters.
 726 // In preference order from best to worst:
 727 //   1) Break after space
 728 //   2) Break before punctuation
 729 //   3) Break after whole character
 730
 731 int Document::SafeSegment(const char *text, int length, int lengthSegment) {
 732         if (length <= lengthSegment)
 733                 return length;
 734         int lastSpaceBreak = -1;
 735         int lastPunctuationBreak = -1;
 736         int lastEncodingAllowedBreak = -1;
 737         for (int j=0; j < lengthSegment;) {
 738                 unsigned char ch = static_cast<unsigned char>(text[j]);
 739                 if (j > 0) {
 740                         if (IsSpaceOrTab(text[j - 1]) && !IsSpaceOrTab(text[j])) {
 741                                 lastSpaceBreak = j;
 742                         }
 743                         if (ch < 'A') {
 744                                 lastPunctuationBreak = j;
 745                         }
 746                 }
 747                 lastEncodingAllowedBreak = j;
 748
 749                 if (dbcsCodePage == SC_CP_UTF8) {
 750                         j += (ch < 0x80) ? 1 : BytesFromLead(ch);
 751                 } else if (dbcsCodePage) {
 752                         j += IsDBCSLeadByte(ch) ? 2 : 1;
 753                 } else {
 754                         j++;
 755                 }
 756         }
 757         if (lastSpaceBreak >= 0) {
 758                 return lastSpaceBreak;
 759         } else if (lastPunctuationBreak >= 0) {
 760                 return lastPunctuationBreak;
 761         }
 762         return lastEncodingAllowedBreak;
 763 }
 764
 765 void Document::ModifiedAt(int pos) {
 766         if (endStyled > pos)
 767                 endStyled = pos;
 768 }
 769
 770 void Document::CheckReadOnly() {
 771         if (cb.IsReadOnly() && enteredReadOnlyCount == 0) {
 772                 enteredReadOnlyCount++;
 773                 NotifyModifyAttempt();
 774                 enteredReadOnlyCount--;
 775         }
 776 }
 777
 778 // Document only modified by gateways DeleteChars, InsertString, Undo, Redo, and SetStyleAt.
 779 // SetStyleAt does not change the persistent state of a document
 780
 781 bool Document::DeleteChars(int pos, int len) {
 782         if (len == 0)
 783                 return false;
 784         if ((pos + len) > Length())
 785                 return false;
 786         CheckReadOnly();
 787         if (enteredModification != 0) {
 788                 return false;
 789         } else {
 790                 enteredModification++;
 791                 if (!cb.IsReadOnly()) {
 792                         NotifyModified(
 793                             DocModification(
 794                                 SC_MOD_BEFOREDELETE | SC_PERFORMED_USER,
 795                                 pos, len,
 796                                 0, 0));
 797                         int prevLinesTotal = LinesTotal();
 798                         bool startSavePoint = cb.IsSavePoint();
 799                         bool startSequence = false;
 800                         const char *text = cb.DeleteChars(pos, len, startSequence);
 801                         if (startSavePoint && cb.IsCollectingUndo())
 802                                 NotifySavePoint(!startSavePoint);
 803                         if ((pos < Length()) || (pos == 0))
 804                                 ModifiedAt(pos);
 805                         else
 806                                 ModifiedAt(pos-1);
 807                         NotifyModified(
 808                             DocModification(
 809                                 SC_MOD_DELETETEXT | SC_PERFORMED_USER | (startSequence?SC_STARTACTION:0),
 810                                 pos, len,
 811                                 LinesTotal() - prevLinesTotal, text));
 812                 }
 813                 enteredModification--;
 814         }
 815         return !cb.IsReadOnly();
 816 }
 817
 818 /**
 819  * Insert a string with a length.
 820  */
 821 bool Document::InsertString(int position, const char *s, int insertLength) {
 822         if (insertLength <= 0) {
 823                 return false;
 824         }
 825         CheckReadOnly();
 826         if (enteredModification != 0) {
 827                 return false;
 828         } else {
 829                 enteredModification++;
 830                 if (!cb.IsReadOnly()) {
 831                         NotifyModified(
 832                             DocModification(
 833                                 SC_MOD_BEFOREINSERT | SC_PERFORMED_USER,
 834                                 position, insertLength,
 835                                 0, s));
 836                         int prevLinesTotal = LinesTotal();
 837                         bool startSavePoint = cb.IsSavePoint();
 838                         bool startSequence = false;
 839                         const char *text = cb.InsertString(position, s, insertLength, startSequence);
 840                         if (startSavePoint && cb.IsCollectingUndo())
 841                                 NotifySavePoint(!startSavePoint);
 842                         ModifiedAt(position);
 843                         NotifyModified(
 844                             DocModification(
 845                                 SC_MOD_INSERTTEXT | SC_PERFORMED_USER | (startSequence?SC_STARTACTION:0),
 846                                 position, insertLength,
 847                                 LinesTotal() - prevLinesTotal, text));
 848                 }
 849                 enteredModification--;
 850         }
 851         return !cb.IsReadOnly();
 852 }
 853
 854 int Document::Undo() {
 855         int newPos = -1;
 856         CheckReadOnly();
 857         if (enteredModification == 0) {
 858                 enteredModification++;
 859                 if (!cb.IsReadOnly()) {
 860                         bool startSavePoint = cb.IsSavePoint();
 861                         bool multiLine = false;
 862                         int steps = cb.StartUndo();
 863                         //Platform::DebugPrintf("Steps=%d\n", steps);
 864                         for (int step = 0; step < steps; step++) {
 865                                 const int prevLinesTotal = LinesTotal();
 866                                 const Action &action = cb.GetUndoStep();
 867                                 if (action.at == removeAction) {
 868                                         NotifyModified(DocModification(
 869                                                                         SC_MOD_BEFOREINSERT | SC_PERFORMED_UNDO, action));
 870                                 } else if (action.at == containerAction) {
 871                                         DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_UNDO);
 872                                         dm.token = action.position;
 873                                         NotifyModified(dm);
 874                                 } else {
 875                                         NotifyModified(DocModification(
 876                                                                         SC_MOD_BEFOREDELETE | SC_PERFORMED_UNDO, action));
 877                                 }
 878                                 cb.PerformUndoStep();
 879                                 int cellPosition = action.position;
 880                                 if (action.at != containerAction) {
 881                                         ModifiedAt(cellPosition);
 882                                         newPos = cellPosition;
 883                                 }
 884
 885                                 int modFlags = SC_PERFORMED_UNDO;
 886                                 // With undo, an insertion action becomes a deletion notification
 887                                 if (action.at == removeAction) {
 888                                         newPos += action.lenData;
 889                                         modFlags |= SC_MOD_INSERTTEXT;
 890                                 } else if (action.at == insertAction) {
 891                                         modFlags |= SC_MOD_DELETETEXT;
 892                                 }
 893                                 if (steps > 1)
 894                                         modFlags |= SC_MULTISTEPUNDOREDO;
 895                                 const int linesAdded = LinesTotal() - prevLinesTotal;
 896                                 if (linesAdded != 0)
 897                                         multiLine = true;
 898                                 if (step == steps - 1) {
 899                                         modFlags |= SC_LASTSTEPINUNDOREDO;
 900                                         if (multiLine)
 901                                                 modFlags |= SC_MULTILINEUNDOREDO;
 902                                 }
 903                                 NotifyModified(DocModification(modFlags, cellPosition, action.lenData,
 904                                                                                            linesAdded, action.data));
 905                         }
 906
 907                         bool endSavePoint = cb.IsSavePoint();
 908                         if (startSavePoint != endSavePoint)
 909                                 NotifySavePoint(endSavePoint);
 910                 }
 911                 enteredModification--;
 912         }
 913         return newPos;
 914 }
 915
 916 int Document::Redo() {
 917         int newPos = -1;
 918         CheckReadOnly();
 919         if (enteredModification == 0) {
 920                 enteredModification++;
 921                 if (!cb.IsReadOnly()) {
 922                         bool startSavePoint = cb.IsSavePoint();
 923                         bool multiLine = false;
 924                         int steps = cb.StartRedo();
 925                         for (int step = 0; step < steps; step++) {
 926                                 const int prevLinesTotal = LinesTotal();
 927                                 const Action &action = cb.GetRedoStep();
 928                                 if (action.at == insertAction) {
 929                                         NotifyModified(DocModification(
 930                                                                         SC_MOD_BEFOREINSERT | SC_PERFORMED_REDO, action));
 931                                 } else if (action.at == containerAction) {
 932                                         DocModification dm(SC_MOD_CONTAINER | SC_PERFORMED_REDO);
 933                                         dm.token = action.position;
 934                                         NotifyModified(dm);
 935                                 } else {
 936                                         NotifyModified(DocModification(
 937                                                                         SC_MOD_BEFOREDELETE | SC_PERFORMED_REDO, action));
 938                                 }
 939                                 cb.PerformRedoStep();
 940                                 if (action.at != containerAction) {
 941                                         ModifiedAt(action.position);
 942                                         newPos = action.position;
 943                                 }
 944
 945                                 int modFlags = SC_PERFORMED_REDO;
 946                                 if (action.at == insertAction) {
 947                                         newPos += action.lenData;
 948                                         modFlags |= SC_MOD_INSERTTEXT;
 949                                 } else if (action.at == removeAction) {
 950                                         modFlags |= SC_MOD_DELETETEXT;
 951                                 }
 952                                 if (steps > 1)
 953                                         modFlags |= SC_MULTISTEPUNDOREDO;
 954                                 const int linesAdded = LinesTotal() - prevLinesTotal;
 955                                 if (linesAdded != 0)
 956                                         multiLine = true;
 957                                 if (step == steps - 1) {
 958                                         modFlags |= SC_LASTSTEPINUNDOREDO;
 959                                         if (multiLine)
 960                                                 modFlags |= SC_MULTILINEUNDOREDO;
 961                                 }
 962                                 NotifyModified(
 963                                         DocModification(modFlags, action.position, action.lenData,
 964                                                                         linesAdded, action.data));
 965                         }
 966
 967                         bool endSavePoint = cb.IsSavePoint();
 968                         if (startSavePoint != endSavePoint)
 969                                 NotifySavePoint(endSavePoint);
 970                 }
 971                 enteredModification--;
 972         }
 973         return newPos;
 974 }
 975
 976 /**
 977  * Insert a single character.
 978  */
 979 bool Document::InsertChar(int pos, char ch) {
 980         char chs[1];
 981         chs[0] = ch;
 982         return InsertString(pos, chs, 1);
 983 }
 984
 985 /**
 986  * Insert a null terminated string.
 987  */
 988 bool Document::InsertCString(int position, const char *s) {
 989         return InsertString(position, s, strlen(s));
 990 }
 991
 992 void Document::ChangeChar(int pos, char ch) {
 993         DeleteChars(pos, 1);
 994         InsertChar(pos, ch);
 995 }
 996
 997 void Document::DelChar(int pos) {
 998         DeleteChars(pos, LenChar(pos));
 999 }
1000
1001 void Document::DelCharBack(int pos) {
1002         if (pos <= 0) {
1003                 return;
1004         } else if (IsCrLf(pos - 2)) {
1005                 DeleteChars(pos - 2, 2);
1006         } else if (dbcsCodePage) {
1007                 int startChar = NextPosition(pos, -1);
1008                 DeleteChars(startChar, pos - startChar);
1009         } else {
1010                 DeleteChars(pos - 1, 1);
1011         }
1012 }
1013
1014 static bool isindentchar(char ch) {
1015         return (ch == ' ') || (ch == '\t');
1016 }
1017
1018 static int NextTab(int pos, int tabSize) {
1019         return ((pos / tabSize) + 1) * tabSize;
1020 }
1021
1022 static void CreateIndentation(char *linebuf, int length, int indent, int tabSize, bool insertSpaces) {
1023         length--;       // ensure space for \0
1024         if (!insertSpaces) {
1025                 while ((indent >= tabSize) && (length > 0)) {
1026                         *linebuf++ = '\t';
1027                         indent -= tabSize;
1028                         length--;
1029                 }
1030         }
1031         while ((indent > 0) && (length > 0)) {
1032                 *linebuf++ = ' ';
1033                 indent--;
1034                 length--;
1035         }
1036         *linebuf = '\0';
1037 }
1038
1039 int SCI_METHOD Document::GetLineIndentation(int line) {
1040         int indent = 0;
1041         if ((line >= 0) && (line < LinesTotal())) {
1042                 int lineStart = LineStart(line);
1043                 int length = Length();
1044                 for (int i = lineStart; i < length; i++) {
1045                         char ch = cb.CharAt(i);
1046                         if (ch == ' ')
1047                                 indent++;
1048                         else if (ch == '\t')
1049                                 indent = NextTab(indent, tabInChars);
1050                         else
1051                                 return indent;
1052                 }
1053         }
1054         return indent;
1055 }
1056
1057 void Document::SetLineIndentation(int line, int indent) {
1058         int indentOfLine = GetLineIndentation(line);
1059         if (indent < 0)
1060                 indent = 0;
1061         if (indent != indentOfLine) {
1062                 char linebuf[1000];
1063                 CreateIndentation(linebuf, sizeof(linebuf), indent, tabInChars, !useTabs);
1064                 int thisLineStart = LineStart(line);
1065                 int indentPos = GetLineIndentPosition(line);
1066                 UndoGroup ug(this);
1067                 DeleteChars(thisLineStart, indentPos - thisLineStart);
1068                 InsertCString(thisLineStart, linebuf);
1069         }
1070 }
1071
1072 int Document::GetLineIndentPosition(int line) const {
1073         if (line < 0)
1074                 return 0;
1075         int pos = LineStart(line);
1076         int length = Length();
1077         while ((pos < length) && isindentchar(cb.CharAt(pos))) {
1078                 pos++;
1079         }
1080         return pos;
1081 }
1082
1083 int Document::GetColumn(int pos) {
1084         int column = 0;
1085         int line = LineFromPosition(pos);
1086         if ((line >= 0) && (line < LinesTotal())) {
1087                 for (int i = LineStart(line); i < pos;) {
1088                         char ch = cb.CharAt(i);
1089                         if (ch == '\t') {
1090                                 column = NextTab(column, tabInChars);
1091                                 i++;
1092                         } else if (ch == '\r') {
1093                                 return column;
1094                         } else if (ch == '\n') {
1095                                 return column;
1096                         } else if (i >= Length()) {
1097                                 return column;
1098                         } else {
1099                                 column++;
1100                                 i = NextPosition(i, 1);
1101                         }
1102                 }
1103         }
1104         return column;
1105 }
1106
1107 int Document::FindColumn(int line, int column) {
1108         int position = LineStart(line);
1109         if ((line >= 0) && (line < LinesTotal())) {
1110                 int columnCurrent = 0;
1111                 while ((columnCurrent < column) && (position < Length())) {
1112                         char ch = cb.CharAt(position);
1113                         if (ch == '\t') {
1114                                 columnCurrent = NextTab(columnCurrent, tabInChars);
1115                                 position++;
1116                         } else if (ch == '\r') {
1117                                 return position;
1118                         } else if (ch == '\n') {
1119                                 return position;
1120                         } else {
1121                                 columnCurrent++;
1122                                 position = NextPosition(position, 1);
1123                         }
1124                 }
1125         }
1126         return position;
1127 }
1128
1129 void Document::Indent(bool forwards, int lineBottom, int lineTop) {
1130         // Dedent - suck white space off the front of the line to dedent by equivalent of a tab
1131         for (int line = lineBottom; line >= lineTop; line--) {
1132                 int indentOfLine = GetLineIndentation(line);
1133                 if (forwards) {
1134                         if (LineStart(line) < LineEnd(line)) {
1135                                 SetLineIndentation(line, indentOfLine + IndentSize());
1136                         }
1137                 } else {
1138                         SetLineIndentation(line, indentOfLine - IndentSize());
1139                 }
1140         }
1141 }
1142
1143 // Convert line endings for a piece of text to a particular mode.
1144 // Stop at len or when a NUL is found.
1145 // Caller must delete the returned pointer.
1146 char *Document::TransformLineEnds(int *pLenOut, const char *s, size_t len, int eolMode) {
1147         char *dest = new char[2 * len + 1];
1148         const char *sptr = s;
1149         char *dptr = dest;
1150         for (size_t i = 0; (i < len) && (*sptr != '\0'); i++) {
1151                 if (*sptr == '\n' || *sptr == '\r') {
1152                         if (eolMode == SC_EOL_CR) {
1153                                 *dptr++ = '\r';
1154                         } else if (eolMode == SC_EOL_LF) {
1155                                 *dptr++ = '\n';
1156                         } else { // eolMode == SC_EOL_CRLF
1157                                 *dptr++ = '\r';
1158                                 *dptr++ = '\n';
1159                         }
1160                         if ((*sptr == '\r') && (i+1 < len) && (*(sptr+1) == '\n')) {
1161                                 i++;
1162                                 sptr++;
1163                         }
1164                         sptr++;
1165                 } else {
1166                         *dptr++ = *sptr++;
1167                 }
1168         }
1169         *dptr++ = '\0';
1170         *pLenOut = (dptr - dest) - 1;
1171         return dest;
1172 }
1173
1174 void Document::ConvertLineEnds(int eolModeSet) {
1175         UndoGroup ug(this);
1176
1177         for (int pos = 0; pos < Length(); pos++) {
1178                 if (cb.CharAt(pos) == '\r') {
1179                         if (cb.CharAt(pos + 1) == '\n') {
1180                                 // CRLF
1181                                 if (eolModeSet == SC_EOL_CR) {
1182                                         DeleteChars(pos + 1, 1); // Delete the LF
1183                                 } else if (eolModeSet == SC_EOL_LF) {
1184                                         DeleteChars(pos, 1); // Delete the CR
1185                                 } else {
1186                                         pos++;
1187                                 }
1188                         } else {
1189                                 // CR
1190                                 if (eolModeSet == SC_EOL_CRLF) {
1191                                         InsertString(pos + 1, "\n", 1); // Insert LF
1192                                         pos++;
1193                                 } else if (eolModeSet == SC_EOL_LF) {
1194                                         InsertString(pos, "\n", 1); // Insert LF
1195                                         DeleteChars(pos + 1, 1); // Delete CR
1196                                 }
1197                         }
1198                 } else if (cb.CharAt(pos) == '\n') {
1199                         // LF
1200                         if (eolModeSet == SC_EOL_CRLF) {
1201                                 InsertString(pos, "\r", 1); // Insert CR
1202                                 pos++;
1203                         } else if (eolModeSet == SC_EOL_CR) {
1204                                 InsertString(pos, "\r", 1); // Insert CR
1205                                 DeleteChars(pos + 1, 1); // Delete LF
1206                         }
1207                 }
1208         }
1209
1210 }
1211
1212 bool Document::IsWhiteLine(int line) const {
1213         int currentChar = LineStart(line);
1214         int endLine = LineEnd(line);
1215         while (currentChar < endLine) {
1216                 if (cb.CharAt(currentChar) != ' ' && cb.CharAt(currentChar) != '\t') {
1217                         return false;
1218                 }
1219                 ++currentChar;
1220         }
1221         return true;
1222 }
1223
1224 int Document::ParaUp(int pos) {
1225         int line = LineFromPosition(pos);
1226         line--;
1227         while (line >= 0 && IsWhiteLine(line)) { // skip empty lines
1228                 line--;
1229         }
1230         while (line >= 0 && !IsWhiteLine(line)) { // skip non-empty lines
1231                 line--;
1232         }
1233         line++;
1234         return LineStart(line);
1235 }
1236
1237 int Document::ParaDown(int pos) {
1238         int line = LineFromPosition(pos);
1239         while (line < LinesTotal() && !IsWhiteLine(line)) { // skip non-empty lines
1240                 line++;
1241         }
1242         while (line < LinesTotal() && IsWhiteLine(line)) { // skip empty lines
1243                 line++;
1244         }
1245         if (line < LinesTotal())
1246                 return LineStart(line);
1247         else // end of a document
1248                 return LineEnd(line-1);
1249 }
1250
1251 CharClassify::cc Document::WordCharClass(unsigned char ch) {
1252         if ((SC_CP_UTF8 == dbcsCodePage) && (ch >= 0x80))
1253                 return CharClassify::ccWord;
1254         return charClass.GetClass(ch);
1255 }
1256
1257 /**
1258  * Used by commmands that want to select whole words.
1259  * Finds the start of word at pos when delta < 0 or the end of the word when delta >= 0.
1260  */
1261 int Document::ExtendWordSelect(int pos, int delta, bool onlyWordCharacters) {
1262         CharClassify::cc ccStart = CharClassify::ccWord;
1263         if (delta < 0) {
1264                 if (!onlyWordCharacters)
1265                         ccStart = WordCharClass(cb.CharAt(pos-1));
1266                 while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == ccStart))
1267                         pos--;
1268         } else {
1269                 if (!onlyWordCharacters && pos < Length())
1270                         ccStart = WordCharClass(cb.CharAt(pos));
1271                 while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == ccStart))
1272                         pos++;
1273         }
1274         return MovePositionOutsideChar(pos, delta, true);
1275 }
1276
1277 /**
1278  * Find the start of the next word in either a forward (delta >= 0) or backwards direction
1279  * (delta < 0).
1280  * This is looking for a transition between character classes although there is also some
1281  * additional movement to transit white space.
1282  * Used by cursor movement by word commands.
1283  */
1284 int Document::NextWordStart(int pos, int delta) {
1285         if (delta < 0) {
1286                 while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == CharClassify::ccSpace))
1287                         pos--;
1288                 if (pos > 0) {
1289                         CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos-1));
1290                         while (pos > 0 && (WordCharClass(cb.CharAt(pos - 1)) == ccStart)) {
1291                                 pos--;
1292                         }
1293                 }
1294         } else {
1295                 CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos));
1296                 while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == ccStart))
1297                         pos++;
1298                 while (pos < (Length()) && (WordCharClass(cb.CharAt(pos)) == CharClassify::ccSpace))
1299                         pos++;
1300         }
1301         return pos;
1302 }
1303
1304 /**
1305  * Find the end of the next word in either a forward (delta >= 0) or backwards direction
1306  * (delta < 0).
1307  * This is looking for a transition between character classes although there is also some
1308  * additional movement to transit white space.
1309  * Used by cursor movement by word commands.
1310  */
1311 int Document::NextWordEnd(int pos, int delta) {
1312         if (delta < 0) {
1313                 if (pos > 0) {
1314                         CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos-1));
1315                         if (ccStart != CharClassify::ccSpace) {
1316                                 while (pos > 0 && WordCharClass(cb.CharAt(pos - 1)) == ccStart) {
1317                                         pos--;
1318                                 }
1319                         }
1320                         while (pos > 0 && WordCharClass(cb.CharAt(pos - 1)) == CharClassify::ccSpace) {
1321                                 pos--;
1322                         }
1323                 }
1324         } else {
1325                 while (pos < Length() && WordCharClass(cb.CharAt(pos)) == CharClassify::ccSpace) {
1326                         pos++;
1327                 }
1328                 if (pos < Length()) {
1329                         CharClassify::cc ccStart = WordCharClass(cb.CharAt(pos));
1330                         while (pos < Length() && WordCharClass(cb.CharAt(pos)) == ccStart) {
1331                                 pos++;
1332                         }
1333                 }
1334         }
1335         return pos;
1336 }
1337
1338 /**
1339  * Check that the character at the given position is a word or punctuation character and that
1340  * the previous character is of a different character class.
1341  */
1342 bool Document::IsWordStartAt(int pos) {
1343         if (pos > 0) {
1344                 CharClassify::cc ccPos = WordCharClass(CharAt(pos));
1345                 return (ccPos == CharClassify::ccWord || ccPos == CharClassify::ccPunctuation) &&
1346                         (ccPos != WordCharClass(CharAt(pos - 1)));
1347         }
1348         return true;
1349 }
1350
1351 /**
1352  * Check that the character at the given position is a word or punctuation character and that
1353  * the next character is of a different character class.
1354  */
1355 bool Document::IsWordEndAt(int pos) {
1356         if (pos < Length()) {
1357                 CharClassify::cc ccPrev = WordCharClass(CharAt(pos-1));
1358                 return (ccPrev == CharClassify::ccWord || ccPrev == CharClassify::ccPunctuation) &&
1359                         (ccPrev != WordCharClass(CharAt(pos)));
1360         }
1361         return true;
1362 }
1363
1364 /**
1365  * Check that the given range is has transitions between character classes at both
1366  * ends and where the characters on the inside are word or punctuation characters.
1367  */
1368 bool Document::IsWordAt(int start, int end) {
1369         return IsWordStartAt(start) && IsWordEndAt(end);
1370 }
1371
1372 static inline char MakeLowerCase(char ch) {
1373         if (ch < 'A' || ch > 'Z')
1374                 return ch;
1375         else
1376                 return static_cast<char>(ch - 'A' + 'a');
1377 }
1378
1379 static bool GoodTrailByte(int v) {
1380         return (v >= 0x80) && (v < 0xc0);
1381 }
1382
1383 size_t Document::ExtractChar(int pos, char *bytes) {
1384         unsigned char ch = static_cast<unsigned char>(cb.CharAt(pos));
1385         size_t widthChar = UTF8CharLength(ch);
1386         bytes[0] = ch;
1387         for (size_t i=1; i<widthChar; i++) {
1388                 bytes[i] = cb.CharAt(pos+i);
1389                 if (!GoodTrailByte(static_cast<unsigned char>(bytes[i]))) { // Bad byte
1390                         widthChar = 1;
1391                 }
1392         }
1393         return widthChar;
1394 }
1395
1396 CaseFolderTable::CaseFolderTable() {
1397         for (size_t iChar=0; iChar<sizeof(mapping); iChar++) {
1398                 mapping[iChar] = static_cast<char>(iChar);
1399         }
1400 }
1401
1402 CaseFolderTable::~CaseFolderTable() {
1403 }
1404
1405 size_t CaseFolderTable::Fold(char *folded, size_t sizeFolded, const char *mixed, size_t lenMixed) {
1406         if (lenMixed > sizeFolded) {
1407                 return 0;
1408         } else {
1409                 for (size_t i=0; i<lenMixed; i++) {
1410                         folded[i] = mapping[static_cast<unsigned char>(mixed[i])];
1411                 }
1412                 return lenMixed;
1413         }
1414 }
1415
1416 void CaseFolderTable::SetTranslation(char ch, char chTranslation) {
1417         mapping[static_cast<unsigned char>(ch)] = chTranslation;
1418 }
1419
1420 void CaseFolderTable::StandardASCII() {
1421         for (size_t iChar=0; iChar<sizeof(mapping); iChar++) {
1422                 if (iChar >= 'A' && iChar <= 'Z') {
1423                         mapping[iChar] = static_cast<char>(iChar - 'A' + 'a');
1424                 } else {
1425                         mapping[iChar] = static_cast<char>(iChar);
1426                 }
1427         }
1428 }
1429
1430 bool Document::MatchesWordOptions(bool word, bool wordStart, int pos, int length) {
1431         return (!word && !wordStart) ||
1432                         (word && IsWordAt(pos, pos + length)) ||
1433                         (wordStart && IsWordStartAt(pos));
1434 }
1435
1436 /**
1437  * Find text in document, supporting both forward and backward
1438  * searches (just pass minPos > maxPos to do a backward search)
1439  * Has not been tested with backwards DBCS searches yet.
1440  */
1441 long Document::FindText(int minPos, int maxPos, const char *search,
1442                         bool caseSensitive, bool word, bool wordStart, bool regExp, int flags,
1443                         int *length, CaseFolder *pcf) {
1444         if (*length <= 0)
1445                 return minPos;
1446         if (regExp) {
1447                 if (!regex)
1448                         regex = CreateRegexSearch(&charClass);
1449                 return regex->FindText(this, minPos, maxPos, search, caseSensitive, word, wordStart, flags, length);
1450         } else {
1451
1452                 const bool forward = minPos <= maxPos;
1453                 const int increment = forward ? 1 : -1;
1454
1455                 // Range endpoints should not be inside DBCS characters, but just in case, move them.
1456                 const int startPos = MovePositionOutsideChar(minPos, increment, false);
1457                 const int endPos = MovePositionOutsideChar(maxPos, increment, false);
1458
1459                 // Compute actual search ranges needed
1460                 const int lengthFind = (*length == -1) ? static_cast<int>(strlen(search)) : *length;
1461                 const int endSearch = (startPos <= endPos) ? endPos - lengthFind + 1 : endPos;
1462
1463                 //Platform::DebugPrintf("Find %d %d %s %d\n", startPos, endPos, ft->lpstrText, lengthFind);
1464                 const int limitPos = Platform::Maximum(startPos, endPos);
1465                 int pos = startPos;
1466                 if (!forward) {
1467                         // Back all of a character
1468                         pos = NextPosition(pos, increment);
1469                 }
1470                 if (caseSensitive) {
1471                         while (forward ? (pos < endSearch) : (pos >= endSearch)) {
1472                                 bool found = (pos + lengthFind) <= limitPos;
1473                                 for (int indexSearch = 0; (indexSearch < lengthFind) && found; indexSearch++) {
1474                                         found = CharAt(pos + indexSearch) == search[indexSearch];
1475                                 }
1476                                 if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
1477                                         return pos;
1478                                 }
1479                                 if (!NextCharacter(pos, increment))
1480                                         break;
1481                         }
1482                 } else if (SC_CP_UTF8 == dbcsCodePage) {
1483                         const size_t maxBytesCharacter = 4;
1484                         const size_t maxFoldingExpansion = 4;
1485                         std::vector<char> searchThing(lengthFind * maxBytesCharacter * maxFoldingExpansion + 1);
1486                         const int lenSearch = pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind);
1487                         while (forward ? (pos < endSearch) : (pos >= endSearch)) {
1488                                 int widthFirstCharacter = 0;
1489                                 int indexDocument = 0;
1490                                 int indexSearch = 0;
1491                                 bool characterMatches = true;
1492                                 while (characterMatches &&
1493                                         ((pos + indexDocument) < limitPos) &&
1494                                         (indexSearch < lenSearch)) {
1495                                         char bytes[maxBytesCharacter + 1];
1496                                         bytes[maxBytesCharacter] = 0;
1497                                         const int widthChar = ExtractChar(pos + indexDocument, bytes);
1498                                         if (!widthFirstCharacter)
1499                                                 widthFirstCharacter = widthChar;
1500                                         char folded[maxBytesCharacter * maxFoldingExpansion + 1];
1501                                         const int lenFlat = pcf->Fold(folded, sizeof(folded), bytes, widthChar);
1502                                         folded[lenFlat] = 0;
1503                                         // Does folded match the buffer
1504                                         characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat);
1505                                         indexDocument += widthChar;
1506                                         indexSearch += lenFlat;
1507                                 }
1508                                 if (characterMatches && (indexSearch == static_cast<int>(lenSearch))) {
1509                                         if (MatchesWordOptions(word, wordStart, pos, indexDocument)) {
1510                                                 *length = indexDocument;
1511                                                 return pos;
1512                                         }
1513                                 }
1514                                 if (forward) {
1515                                         pos += widthFirstCharacter;
1516                                 } else {
1517                                         if (!NextCharacter(pos, increment))
1518                                                 break;
1519                                 }
1520                         }
1521                 } else if (dbcsCodePage) {
1522                         const size_t maxBytesCharacter = 2;
1523                         const size_t maxFoldingExpansion = 4;
1524                         std::vector<char> searchThing(lengthFind * maxBytesCharacter * maxFoldingExpansion + 1);
1525                         const int lenSearch = pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind);
1526                         while (forward ? (pos < endSearch) : (pos >= endSearch)) {
1527                                 int indexDocument = 0;
1528                                 int indexSearch = 0;
1529                                 bool characterMatches = true;
1530                                 while (characterMatches &&
1531                                         ((pos + indexDocument) < limitPos) &&
1532                                         (indexSearch < lenSearch)) {
1533                                         char bytes[maxBytesCharacter + 1];
1534                                         bytes[0] = cb.CharAt(pos + indexDocument);
1535                                         const int widthChar = IsDBCSLeadByte(bytes[0]) ? 2 : 1;
1536                                         if (widthChar == 2)
1537                                                 bytes[1] = cb.CharAt(pos + indexDocument + 1);
1538                                         char folded[maxBytesCharacter * maxFoldingExpansion + 1];
1539                                         const int lenFlat = pcf->Fold(folded, sizeof(folded), bytes, widthChar);
1540                                         folded[lenFlat] = 0;
1541                                         // Does folded match the buffer
1542                                         characterMatches = 0 == memcmp(folded, &searchThing[0] + indexSearch, lenFlat);
1543                                         indexDocument += widthChar;
1544                                         indexSearch += lenFlat;
1545                                 }
1546                                 if (characterMatches && (indexSearch == static_cast<int>(lenSearch))) {
1547                                         if (MatchesWordOptions(word, wordStart, pos, indexDocument)) {
1548                                                 *length = indexDocument;
1549                                                 return pos;
1550                                         }
1551                                 }
1552                                 if (!NextCharacter(pos, increment))
1553                                         break;
1554                         }
1555                 } else {
1556                         CaseFolderTable caseFolder;
1557                         std::vector<char> searchThing(lengthFind + 1);
1558                         pcf->Fold(&searchThing[0], searchThing.size(), search, lengthFind);
1559                         while (forward ? (pos < endSearch) : (pos >= endSearch)) {
1560                                 bool found = (pos + lengthFind) <= limitPos;
1561                                 for (int indexSearch = 0; (indexSearch < lengthFind) && found; indexSearch++) {
1562                                         char ch = CharAt(pos + indexSearch);
1563                                         char folded[2];
1564                                         pcf->Fold(folded, sizeof(folded), &ch, 1);
1565                                         found = folded[0] == searchThing[indexSearch];
1566                                 }
1567                                 if (found && MatchesWordOptions(word, wordStart, pos, lengthFind)) {
1568                                         return pos;
1569                                 }
1570                                 if (!NextCharacter(pos, increment))
1571                                         break;
1572                         }
1573                 }
1574         }
1575         //Platform::DebugPrintf("Not found\n");
1576         return -1;
1577 }
1578
1579 const char *Document::SubstituteByPosition(const char *text, int *length) {
1580         if (regex)
1581                 return regex->SubstituteByPosition(this, text, length);
1582         else
1583                 return 0;
1584 }
1585
1586 int Document::LinesTotal() const {
1587         return cb.Lines();
1588 }
1589
1590 void Document::ChangeCase(Range r, bool makeUpperCase) {
1591         for (int pos = r.start; pos < r.end;) {
1592                 int len = LenChar(pos);
1593                 if (len == 1) {
1594                         char ch = CharAt(pos);
1595                         if (makeUpperCase) {
1596                                 if (IsLowerCase(ch)) {
1597                                         ChangeChar(pos, static_cast<char>(MakeUpperCase(ch)));
1598                                 }
1599                         } else {
1600                                 if (IsUpperCase(ch)) {
1601                                         ChangeChar(pos, static_cast<char>(MakeLowerCase(ch)));
1602                                 }
1603                         }
1604                 }
1605                 pos += len;
1606         }
1607 }
1608
1609 void Document::SetDefaultCharClasses(bool includeWordClass) {
1610     charClass.SetDefaultCharClasses(includeWordClass);
1611 }
1612
1613 void Document::SetCharClasses(const unsigned char *chars, CharClassify::cc newCharClass) {
1614     charClass.SetCharClasses(chars, newCharClass);
1615 }
1616
1617 void Document::SetStylingBits(int bits) {
1618         stylingBits = bits;
1619         stylingBitsMask = (1 << stylingBits) - 1;
1620 }
1621
1622 void SCI_METHOD Document::StartStyling(int position, char mask) {
1623         stylingMask = mask;
1624         endStyled = position;
1625 }
1626
1627 bool SCI_METHOD Document::SetStyleFor(int length, char style) {
1628         if (enteredStyling != 0) {
1629                 return false;
1630         } else {
1631                 enteredStyling++;
1632                 style &= stylingMask;
1633                 int prevEndStyled = endStyled;
1634                 if (cb.SetStyleFor(endStyled, length, style, stylingMask)) {
1635                         DocModification mh(SC_MOD_CHANGESTYLE | SC_PERFORMED_USER,
1636                                            prevEndStyled, length);
1637                         NotifyModified(mh);
1638                 }
1639                 endStyled += length;
1640                 enteredStyling--;
1641                 return true;
1642         }
1643 }
1644
1645 bool SCI_METHOD Document::SetStyles(int length, const char *styles) {
1646         if (enteredStyling != 0) {
1647                 return false;
1648         } else {
1649                 enteredStyling++;
1650                 bool didChange = false;
1651                 int startMod = 0;
1652                 int endMod = 0;
1653                 for (int iPos = 0; iPos < length; iPos++, endStyled++) {
1654                         PLATFORM_ASSERT(endStyled < Length());
1655                         if (cb.SetStyleAt(endStyled, styles[iPos], stylingMask)) {
1656                                 if (!didChange) {
1657                                         startMod = endStyled;
1658                                 }
1659                                 didChange = true;
1660                                 endMod = endStyled;
1661                         }
1662                 }
1663                 if (didChange) {
1664                         DocModification mh(SC_MOD_CHANGESTYLE | SC_PERFORMED_USER,
1665                                            startMod, endMod - startMod + 1);
1666                         NotifyModified(mh);
1667                 }
1668                 enteredStyling--;
1669                 return true;
1670         }
1671 }
1672
1673 void Document::EnsureStyledTo(int pos) {
1674         if ((enteredStyling == 0) && (pos > GetEndStyled())) {
1675                 IncrementStyleClock();
1676                 if (pli && !pli->UseContainerLexing()) {
1677                         int lineEndStyled = LineFromPosition(GetEndStyled());
1678                         int endStyledTo = LineStart(lineEndStyled);
1679                         pli->Colourise(endStyledTo, pos);
1680                 } else {
1681                         // Ask the watchers to style, and stop as soon as one responds.
1682                         for (int i = 0; pos > GetEndStyled() && i < lenWatchers; i++) {
1683                                 watchers[i].watcher->NotifyStyleNeeded(this, watchers[i].userData, pos);
1684                         }
1685                 }
1686         }
1687 }
1688
1689 void Document::LexerChanged() {
1690         // Tell the watchers the lexer has changed.
1691         for (int i = 0; i < lenWatchers; i++) {
1692                 watchers[i].watcher->NotifyLexerChanged(this, watchers[i].userData);
1693         }
1694 }
1695
1696 int SCI_METHOD Document::SetLineState(int line, int state) {
1697         int statePrevious = static_cast<LineState *>(perLineData[ldState])->SetLineState(line, state);
1698         if (state != statePrevious) {
1699                 DocModification mh(SC_MOD_CHANGELINESTATE, LineStart(line), 0, 0, 0, line);
1700                 NotifyModified(mh);
1701         }
1702         return statePrevious;
1703 }
1704
1705 int SCI_METHOD Document::GetLineState(int line) const {
1706         return static_cast<LineState *>(perLineData[ldState])->GetLineState(line);
1707 }
1708
1709 int Document::GetMaxLineState() {
1710         return static_cast<LineState *>(perLineData[ldState])->GetMaxLineState();
1711 }
1712
1713 void SCI_METHOD Document::ChangeLexerState(int start, int end) {
1714         DocModification mh(SC_MOD_LEXERSTATE, start, end-start, 0, 0, 0);
1715         NotifyModified(mh);
1716 }
1717
1718 StyledText Document::MarginStyledText(int line) {
1719         LineAnnotation *pla = static_cast<LineAnnotation *>(perLineData[ldMargin]);
1720         return StyledText(pla->Length(line), pla->Text(line),
1721                 pla->MultipleStyles(line), pla->Style(line), pla->Styles(line));
1722 }
1723
1724 void Document::MarginSetText(int line, const char *text) {
1725         static_cast<LineAnnotation *>(perLineData[ldMargin])->SetText(line, text);
1726         DocModification mh(SC_MOD_CHANGEMARGIN, LineStart(line), 0, 0, 0, line);
1727         NotifyModified(mh);
1728 }
1729
1730 void Document::MarginSetStyle(int line, int style) {
1731         static_cast<LineAnnotation *>(perLineData[ldMargin])->SetStyle(line, style);
1732 }
1733
1734 void Document::MarginSetStyles(int line, const unsigned char *styles) {
1735         static_cast<LineAnnotation *>(perLineData[ldMargin])->SetStyles(line, styles);
1736 }
1737
1738 int Document::MarginLength(int line) const {
1739         return static_cast<LineAnnotation *>(perLineData[ldMargin])->Length(line);
1740 }
1741
1742 void Document::MarginClearAll() {
1743         int maxEditorLine = LinesTotal();
1744         for (int l=0; l<maxEditorLine; l++)
1745                 MarginSetText(l, 0);
1746         // Free remaining data
1747         static_cast<LineAnnotation *>(perLineData[ldMargin])->ClearAll();
1748 }
1749
1750 bool Document::AnnotationAny() const {
1751         return static_cast<LineAnnotation *>(perLineData[ldAnnotation])->AnySet();
1752 }
1753
1754 StyledText Document::AnnotationStyledText(int line) {
1755         LineAnnotation *pla = static_cast<LineAnnotation *>(perLineData[ldAnnotation]);
1756         return StyledText(pla->Length(line), pla->Text(line),
1757                 pla->MultipleStyles(line), pla->Style(line), pla->Styles(line));
1758 }
1759
1760 void Document::AnnotationSetText(int line, const char *text) {
1761         const int linesBefore = AnnotationLines(line);
1762         static_cast<LineAnnotation *>(perLineData[ldAnnotation])->SetText(line, text);
1763         const int linesAfter = AnnotationLines(line);
1764         DocModification mh(SC_MOD_CHANGEANNOTATION, LineStart(line), 0, 0, 0, line);
1765         mh.annotationLinesAdded = linesAfter - linesBefore;
1766         NotifyModified(mh);
1767 }
1768
1769 void Document::AnnotationSetStyle(int line, int style) {
1770         static_cast<LineAnnotation *>(perLineData[ldAnnotation])->SetStyle(line, style);
1771         DocModification mh(SC_MOD_CHANGEANNOTATION, LineStart(line), 0, 0, 0, line);
1772         NotifyModified(mh);
1773 }
1774
1775 void Document::AnnotationSetStyles(int line, const unsigned char *styles) {
1776         static_cast<LineAnnotation *>(perLineData[ldAnnotation])->SetStyles(line, styles);
1777 }
1778
1779 int Document::AnnotationLength(int line) const {
1780         return static_cast<LineAnnotation *>(perLineData[ldAnnotation])->Length(line);
1781 }
1782
1783 int Document::AnnotationLines(int line) const {
1784         return static_cast<LineAnnotation *>(perLineData[ldAnnotation])->Lines(line);
1785 }
1786
1787 void Document::AnnotationClearAll() {
1788         int maxEditorLine = LinesTotal();
1789         for (int l=0; l<maxEditorLine; l++)
1790                 AnnotationSetText(l, 0);
1791         // Free remaining data
1792         static_cast<LineAnnotation *>(perLineData[ldAnnotation])->ClearAll();
1793 }
1794
1795 void Document::IncrementStyleClock() {
1796         styleClock = (styleClock + 1) % 0x100000;
1797 }
1798
1799 void SCI_METHOD Document::DecorationFillRange(int position, int value, int fillLength) {
1800         if (decorations.FillRange(position, value, fillLength)) {
1801                 DocModification mh(SC_MOD_CHANGEINDICATOR | SC_PERFORMED_USER,
1802                                                         position, fillLength);
1803                 NotifyModified(mh);
1804         }
1805 }
1806
1807 bool Document::AddWatcher(DocWatcher *watcher, void *userData) {
1808         for (int i = 0; i < lenWatchers; i++) {
1809                 if ((watchers[i].watcher == watcher) &&
1810                         (watchers[i].userData == userData))
1811                         return false;
1812         }
1813         WatcherWithUserData *pwNew = new WatcherWithUserData[lenWatchers + 1];
1814         for (int j = 0; j < lenWatchers; j++)
1815                 pwNew[j] = watchers[j];
1816         pwNew[lenWatchers].watcher = watcher;
1817         pwNew[lenWatchers].userData = userData;
1818         delete []watchers;
1819         watchers = pwNew;
1820         lenWatchers++;
1821         return true;
1822 }
1823
1824 bool Document::RemoveWatcher(DocWatcher *watcher, void *userData) {
1825         for (int i = 0; i < lenWatchers; i++) {
1826                 if ((watchers[i].watcher == watcher) &&
1827                         (watchers[i].userData == userData)) {
1828                         if (lenWatchers == 1) {
1829                                 delete []watchers;
1830                                 watchers = 0;
1831                                 lenWatchers = 0;
1832                         } else {
1833                                 WatcherWithUserData *pwNew = new WatcherWithUserData[lenWatchers];
1834                                 for (int j = 0; j < lenWatchers - 1; j++) {
1835                                         pwNew[j] = (j < i) ? watchers[j] : watchers[j + 1];
1836                                 }
1837                                 delete []watchers;
1838                                 watchers = pwNew;
1839                                 lenWatchers--;
1840                         }
1841                         return true;
1842                 }
1843         }
1844         return false;
1845 }
1846
1847 void Document::NotifyModifyAttempt() {
1848         for (int i = 0; i < lenWatchers; i++) {
1849                 watchers[i].watcher->NotifyModifyAttempt(this, watchers[i].userData);
1850         }
1851 }
1852
1853 void Document::NotifySavePoint(bool atSavePoint) {
1854         for (int i = 0; i < lenWatchers; i++) {
1855                 watchers[i].watcher->NotifySavePoint(this, watchers[i].userData, atSavePoint);
1856         }
1857 }
1858
1859 void Document::NotifyModified(DocModification mh) {
1860         if (mh.modificationType & SC_MOD_INSERTTEXT) {
1861                 decorations.InsertSpace(mh.position, mh.length);
1862         } else if (mh.modificationType & SC_MOD_DELETETEXT) {
1863                 decorations.DeleteRange(mh.position, mh.length);
1864         }
1865         for (int i = 0; i < lenWatchers; i++) {
1866                 watchers[i].watcher->NotifyModified(this, mh, watchers[i].userData);
1867         }
1868 }
1869
1870 bool Document::IsWordPartSeparator(char ch) {
1871         return (WordCharClass(ch) == CharClassify::ccWord) && IsPunctuation(ch);
1872 }
1873
1874 int Document::WordPartLeft(int pos) {
1875         if (pos > 0) {
1876                 --pos;
1877                 char startChar = cb.CharAt(pos);
1878                 if (IsWordPartSeparator(startChar)) {
1879                         while (pos > 0 && IsWordPartSeparator(cb.CharAt(pos))) {
1880                                 --pos;
1881                         }
1882                 }
1883                 if (pos > 0) {
1884                         startChar = cb.CharAt(pos);
1885                         --pos;
1886                         if (IsLowerCase(startChar)) {
1887                                 while (pos > 0 && IsLowerCase(cb.CharAt(pos)))
1888                                         --pos;
1889                                 if (!IsUpperCase(cb.CharAt(pos)) && !IsLowerCase(cb.CharAt(pos)))
1890                                         ++pos;
1891                         } else if (IsUpperCase(startChar)) {
1892                                 while (pos > 0 && IsUpperCase(cb.CharAt(pos)))
1893                                         --pos;
1894                                 if (!IsUpperCase(cb.CharAt(pos)))
1895                                         ++pos;
1896                         } else if (IsADigit(startChar)) {
1897                                 while (pos > 0 && IsADigit(cb.CharAt(pos)))
1898                                         --pos;
1899                                 if (!IsADigit(cb.CharAt(pos)))
1900                                         ++pos;
1901                         } else if (IsPunctuation(startChar)) {
1902                                 while (pos > 0 && IsPunctuation(cb.CharAt(pos)))
1903                                         --pos;
1904                                 if (!IsPunctuation(cb.CharAt(pos)))
1905                                         ++pos;
1906                         } else if (isspacechar(startChar)) {
1907                                 while (pos > 0 && isspacechar(cb.CharAt(pos)))
1908                                         --pos;
1909                                 if (!isspacechar(cb.CharAt(pos)))
1910                                         ++pos;
1911                         } else if (!isascii(startChar)) {
1912                                 while (pos > 0 && !isascii(cb.CharAt(pos)))
1913                                         --pos;
1914                                 if (isascii(cb.CharAt(pos)))
1915                                         ++pos;
1916                         } else {
1917                                 ++pos;
1918                         }
1919                 }
1920         }
1921         return pos;
1922 }
1923
1924 int Document::WordPartRight(int pos) {
1925         char startChar = cb.CharAt(pos);
1926         int length = Length();
1927         if (IsWordPartSeparator(startChar)) {
1928                 while (pos < length && IsWordPartSeparator(cb.CharAt(pos)))
1929                         ++pos;
1930                 startChar = cb.CharAt(pos);
1931         }
1932         if (!isascii(startChar)) {
1933                 while (pos < length && !isascii(cb.CharAt(pos)))
1934                         ++pos;
1935         } else if (IsLowerCase(startChar)) {
1936                 while (pos < length && IsLowerCase(cb.CharAt(pos)))
1937                         ++pos;
1938         } else if (IsUpperCase(startChar)) {
1939                 if (IsLowerCase(cb.CharAt(pos + 1))) {
1940                         ++pos;
1941                         while (pos < length && IsLowerCase(cb.CharAt(pos)))
1942                                 ++pos;
1943                 } else {
1944                         while (pos < length && IsUpperCase(cb.CharAt(pos)))
1945                                 ++pos;
1946                 }
1947                 if (IsLowerCase(cb.CharAt(pos)) && IsUpperCase(cb.CharAt(pos - 1)))
1948                         --pos;
1949         } else if (IsADigit(startChar)) {
1950                 while (pos < length && IsADigit(cb.CharAt(pos)))
1951                         ++pos;
1952         } else if (IsPunctuation(startChar)) {
1953                 while (pos < length && IsPunctuation(cb.CharAt(pos)))
1954                         ++pos;
1955         } else if (isspacechar(startChar)) {
1956                 while (pos < length && isspacechar(cb.CharAt(pos)))
1957                         ++pos;
1958         } else {
1959                 ++pos;
1960         }
1961         return pos;
1962 }
1963
1964 bool IsLineEndChar(char c) {
1965         return (c == '\n' || c == '\r');
1966 }
1967
1968 int Document::ExtendStyleRange(int pos, int delta, bool singleLine) {
1969         int sStart = cb.StyleAt(pos);
1970         if (delta < 0) {
1971                 while (pos > 0 && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsLineEndChar(cb.CharAt(pos))))
1972                         pos--;
1973                 pos++;
1974         } else {
1975                 while (pos < (Length()) && (cb.StyleAt(pos) == sStart) && (!singleLine || !IsLineEndChar(cb.CharAt(pos))))
1976                         pos++;
1977         }
1978         return pos;
1979 }
1980
1981 static char BraceOpposite(char ch) {
1982         switch (ch) {
1983         case '(':
1984                 return ')';
1985         case ')':
1986                 return '(';
1987         case '[':
1988                 return ']';
1989         case ']':
1990                 return '[';
1991         case '{':
1992                 return '}';
1993         case '}':
1994                 return '{';
1995         case '<':
1996                 return '>';
1997         case '>':
1998                 return '<';
1999         default:
2000                 return '\0';
2001         }
2002 }
2003
2004 // TODO: should be able to extend styled region to find matching brace
2005 int Document::BraceMatch(int position, int /*maxReStyle*/) {
2006         char chBrace = CharAt(position);
2007         char chSeek = BraceOpposite(chBrace);
2008         if (chSeek == '\0')
2009                 return - 1;
2010         char styBrace = static_cast<char>(StyleAt(position) & stylingBitsMask);
2011         int direction = -1;
2012         if (chBrace == '(' || chBrace == '[' || chBrace == '{' || chBrace == '<')
2013                 direction = 1;
2014         int depth = 1;
2015         position = NextPosition(position, direction);
2016         while ((position >= 0) && (position < Length())) {
2017                 char chAtPos = CharAt(position);
2018                 char styAtPos = static_cast<char>(StyleAt(position) & stylingBitsMask);
2019                 if ((position > GetEndStyled()) || (styAtPos == styBrace)) {
2020                         if (chAtPos == chBrace)
2021                                 depth++;
2022                         if (chAtPos == chSeek)
2023                                 depth--;
2024                         if (depth == 0)
2025                                 return position;
2026                 }
2027                 int positionBeforeMove = position;
2028                 position = NextPosition(position, direction);
2029                 if (position == positionBeforeMove)
2030                         break;
2031         }
2032         return - 1;
2033 }
2034
2035 /**
2036  * Implementation of RegexSearchBase for the default built-in regular expression engine
2037  */
2038 class BuiltinRegex : public RegexSearchBase {
2039 public:
2040         BuiltinRegex(CharClassify *charClassTable) : search(charClassTable), substituted(NULL) {}
2041
2042         virtual ~BuiltinRegex() {
2043                 delete substituted;
2044         }
2045
2046         virtual long FindText(Document *doc, int minPos, int maxPos, const char *s,
2047                         bool caseSensitive, bool word, bool wordStart, int flags,
2048                         int *length);
2049
2050         virtual const char *SubstituteByPosition(Document *doc, const char *text, int *length);
2051
2052 private:
2053         RESearch search;
2054         char *substituted;
2055 };
2056
2057 // Define a way for the Regular Expression code to access the document
2058 class DocumentIndexer : public CharacterIndexer {
2059         Document *pdoc;
2060         int end;
2061 public:
2062         DocumentIndexer(Document *pdoc_, int end_) :
2063                 pdoc(pdoc_), end(end_) {
2064         }
2065
2066         virtual ~DocumentIndexer() {
2067         }
2068
2069         virtual char CharAt(int index) {
2070                 if (index < 0 || index >= end)
2071                         return 0;
2072                 else
2073                         return pdoc->CharAt(index);
2074         }
2075 };
2076
2077 long BuiltinRegex::FindText(Document *doc, int minPos, int maxPos, const char *s,
2078                         bool caseSensitive, bool, bool, int flags,
2079                         int *length) {
2080         bool posix = (flags & SCFIND_POSIX) != 0;
2081         int increment = (minPos <= maxPos) ? 1 : -1;
2082
2083         int startPos = minPos;
2084         int endPos = maxPos;
2085
2086         // Range endpoints should not be inside DBCS characters, but just in case, move them.
2087         startPos = doc->MovePositionOutsideChar(startPos, 1, false);
2088         endPos = doc->MovePositionOutsideChar(endPos, 1, false);
2089
2090         const char *errmsg = search.Compile(s, *length, caseSensitive, posix);
2091         if (errmsg) {
2092                 return -1;
2093         }
2094         // Find a variable in a property file: \$(\([A-Za-z0-9_.]+\))
2095         // Replace first '.' with '-' in each property file variable reference:
2096         //     Search: \$(\([A-Za-z0-9_-]+\)\.\([A-Za-z0-9_.]+\))
2097         //     Replace: $(\1-\2)
2098         int lineRangeStart = doc->LineFromPosition(startPos);
2099         int lineRangeEnd = doc->LineFromPosition(endPos);
2100         if ((increment == 1) &&
2101                 (startPos >= doc->LineEnd(lineRangeStart)) &&
2102                 (lineRangeStart < lineRangeEnd)) {
2103                 // the start position is at end of line or between line end characters.
2104                 lineRangeStart++;
2105                 startPos = doc->LineStart(lineRangeStart);
2106         } else if ((increment == -1) &&
2107                    (startPos <= doc->LineStart(lineRangeStart)) &&
2108                    (lineRangeStart > lineRangeEnd)) {
2109                 // the start position is at beginning of line.
2110                 lineRangeStart--;
2111                 startPos = doc->LineEnd(lineRangeStart);
2112         }
2113         int pos = -1;
2114         int lenRet = 0;
2115         char searchEnd = s[*length - 1];
2116         int lineRangeBreak = lineRangeEnd + increment;
2117         for (int line = lineRangeStart; line != lineRangeBreak; line += increment) {
2118                 int startOfLine = doc->LineStart(line);
2119                 int endOfLine = doc->LineEnd(line);
2120                 if (increment == 1) {
2121                         if (line == lineRangeStart) {
2122                                 if ((startPos != startOfLine) && (s[0] == '^'))
2123                                         continue;       // Can't match start of line if start position after start of line
2124                                 startOfLine = startPos;
2125                         }
2126                         if (line == lineRangeEnd) {
2127                                 if ((endPos != endOfLine) && (searchEnd == '$'))
2128                                         continue;       // Can't match end of line if end position before end of line
2129                                 endOfLine = endPos;
2130                         }
2131                 } else {
2132                         if (line == lineRangeEnd) {
2133                                 if ((endPos != startOfLine) && (s[0] == '^'))
2134                                         continue;       // Can't match start of line if end position after start of line
2135                                 startOfLine = endPos;
2136                         }
2137                         if (line == lineRangeStart) {
2138                                 if ((startPos != endOfLine) && (searchEnd == '$'))
2139                                         continue;       // Can't match end of line if start position before end of line
2140                                 endOfLine = startPos;
2141                         }
2142                 }
2143
2144                 DocumentIndexer di(doc, endOfLine);
2145                 int success = search.Execute(di, startOfLine, endOfLine);
2146                 if (success) {
2147                         pos = search.bopat[0];
2148                         lenRet = search.eopat[0] - search.bopat[0];
2149                         // There can be only one start of a line, so no need to look for last match in line
2150                         if ((increment == -1) && (s[0] != '^')) {
2151                                 // Check for the last match on this line.
2152                                 int repetitions = 1000; // Break out of infinite loop
2153                                 while (success && (search.eopat[0] <= endOfLine) && (repetitions--)) {
2154                                         success = search.Execute(di, pos+1, endOfLine);
2155                                         if (success) {
2156                                                 if (search.eopat[0] <= minPos) {
2157                                                         pos = search.bopat[0];
2158                                                         lenRet = search.eopat[0] - search.bopat[0];
2159                                                 } else {
2160                                                         success = 0;
2161                                                 }
2162                                         }
2163                                 }
2164                         }
2165                         break;
2166                 }
2167         }
2168         *length = lenRet;
2169         return pos;
2170 }
2171
2172 const char *BuiltinRegex::SubstituteByPosition(Document *doc, const char *text, int *length) {
2173         delete []substituted;
2174         substituted = 0;
2175         DocumentIndexer di(doc, doc->Length());
2176         if (!search.GrabMatches(di))
2177                 return 0;
2178         unsigned int lenResult = 0;
2179         for (int i = 0; i < *length; i++) {
2180                 if (text[i] == '\\') {
2181                         if (text[i + 1] >= '1' && text[i + 1] <= '9') {
2182                                 unsigned int patNum = text[i + 1] - '0';
2183                                 lenResult += search.eopat[patNum] - search.bopat[patNum];
2184                                 i++;
2185                         } else {
2186                                 switch (text[i + 1]) {
2187                                 case 'a':
2188                                 case 'b':
2189                                 case 'f':
2190                                 case 'n':
2191                                 case 'r':
2192                                 case 't':
2193                                 case 'v':
2194                                 case '\\':
2195                                         i++;
2196                                 }
2197                                 lenResult++;
2198                         }
2199                 } else {
2200                         lenResult++;
2201                 }
2202         }
2203         substituted = new char[lenResult + 1];
2204         char *o = substituted;
2205         for (int j = 0; j < *length; j++) {
2206                 if (text[j] == '\\') {
2207                         if (text[j + 1] >= '1' && text[j + 1] <= '9') {
2208                                 unsigned int patNum = text[j + 1] - '0';
2209                                 unsigned int len = search.eopat[patNum] - search.bopat[patNum];
2210                                 if (search.pat[patNum]) // Will be null if try for a match that did not occur
2211                                         memcpy(o, search.pat[patNum], len);
2212                                 o += len;
2213                                 j++;
2214                         } else {
2215                                 j++;
2216                                 switch (text[j]) {
2217                                 case 'a':
2218                                         *o++ = '\a';
2219                                         break;
2220                                 case 'b':
2221                                         *o++ = '\b';
2222                                         break;
2223                                 case 'f':
2224                                         *o++ = '\f';
2225                                         break;
2226                                 case 'n':
2227                                         *o++ = '\n';
2228                                         break;
2229                                 case 'r':
2230                                         *o++ = '\r';
2231                                         break;
2232                                 case 't':
2233                                         *o++ = '\t';
2234                                         break;
2235                                 case 'v':
2236                                         *o++ = '\v';
2237                                         break;
2238                                 case '\\':
2239                                         *o++ = '\\';
2240                                         break;
2241                                 default:
2242                                         *o++ = '\\';
2243                                         j--;
2244                                 }
2245                         }
2246                 } else {
2247                         *o++ = text[j];
2248                 }
2249         }
2250         *o = '\0';
2251         *length = lenResult;
2252         return substituted;
2253 }
2254
2255 #ifndef SCI_OWNREGEX
2256
2257 #ifdef SCI_NAMESPACE
2258
2259 RegexSearchBase *Scintilla::CreateRegexSearch(CharClassify *charClassTable) {
2260         return new BuiltinRegex(charClassTable);
2261 }
2262
2263 #else
2264
2265 RegexSearchBase *CreateRegexSearch(CharClassify *charClassTable) {
2266         return new BuiltinRegex(charClassTable);
2267 }
2268
2269 #endif
2270
2271 #endif