scintilla/lexers/LexRust.cxx

   1 /** @file LexRust.cxx
   2  ** Lexer for Rust.
   3  **
   4  ** Copyright (c) 2013 by SiegeLord <slabode@aim.com>
   5  ** Converted to lexer object and added further folding features/properties by "Udo Lechner" <dlchnr(at)gmx(dot)net>
   6  **/
   7 // Copyright 1998-2005 by Neil Hodgson <neilh@scintilla.org>
   8 // The License.txt file describes the conditions under which this software may be distributed.
   9
  10 #include <stdlib.h>
  11 #include <string.h>
  12 #include <stdio.h>
  13 #include <stdarg.h>
  14 #include <assert.h>
  15 #include <ctype.h>
  16
  17 #include <string>
  18 #include <map>
  19
  20 #include "ILexer.h"
  21 #include "Scintilla.h"
  22 #include "SciLexer.h"
  23
  24 #include "PropSetSimple.h"
  25 #include "WordList.h"
  26 #include "LexAccessor.h"
  27 #include "Accessor.h"
  28 #include "StyleContext.h"
  29 #include "CharacterSet.h"
  30 #include "LexerModule.h"
  31 #include "OptionSet.h"
  32
  33 #ifdef SCI_NAMESPACE
  34 using namespace Scintilla;
  35 #endif
  36
  37 static const int NUM_RUST_KEYWORD_LISTS = 7;
  38 static const int MAX_RUST_IDENT_CHARS = 1023;
  39
  40 static bool IsStreamCommentStyle(int style) {
  41         return style == SCE_RUST_COMMENTBLOCK ||
  42                    style == SCE_RUST_COMMENTBLOCKDOC;
  43 }
  44
  45 // Options used for LexerRust
  46 struct OptionsRust {
  47         bool fold;
  48         bool foldSyntaxBased;
  49         bool foldComment;
  50         bool foldCommentMultiline;
  51         bool foldCommentExplicit;
  52         std::string foldExplicitStart;
  53         std::string foldExplicitEnd;
  54         bool foldExplicitAnywhere;
  55         bool foldCompact;
  56         int  foldAtElseInt;
  57         bool foldAtElse;
  58         OptionsRust() {
  59                 fold = false;
  60                 foldSyntaxBased = true;
  61                 foldComment = false;
  62                 foldCommentMultiline = true;
  63                 foldCommentExplicit = true;
  64                 foldExplicitStart = "";
  65                 foldExplicitEnd   = "";
  66                 foldExplicitAnywhere = false;
  67                 foldCompact = true;
  68                 foldAtElseInt = -1;
  69                 foldAtElse = false;
  70         }
  71 };
  72
  73 static const char * const rustWordLists[NUM_RUST_KEYWORD_LISTS + 1] = {
  74                         "Primary keywords and identifiers",
  75                         "Built in types",
  76                         "Other keywords",
  77                         "Keywords 4",
  78                         "Keywords 5",
  79                         "Keywords 6",
  80                         "Keywords 7",
  81                         0,
  82                 };
  83
  84 struct OptionSetRust : public OptionSet<OptionsRust> {
  85         OptionSetRust() {
  86                 DefineProperty("fold", &OptionsRust::fold);
  87
  88                 DefineProperty("fold.comment", &OptionsRust::foldComment);
  89
  90                 DefineProperty("fold.compact", &OptionsRust::foldCompact);
  91
  92                 DefineProperty("fold.at.else", &OptionsRust::foldAtElse);
  93
  94                 DefineProperty("fold.rust.syntax.based", &OptionsRust::foldSyntaxBased,
  95                         "Set this property to 0 to disable syntax based folding.");
  96
  97                 DefineProperty("fold.rust.comment.multiline", &OptionsRust::foldCommentMultiline,
  98                         "Set this property to 0 to disable folding multi-line comments when fold.comment=1.");
  99
 100                 DefineProperty("fold.rust.comment.explicit", &OptionsRust::foldCommentExplicit,
 101                         "Set this property to 0 to disable folding explicit fold points when fold.comment=1.");
 102
 103                 DefineProperty("fold.rust.explicit.start", &OptionsRust::foldExplicitStart,
 104                         "The string to use for explicit fold start points, replacing the standard //{.");
 105
 106                 DefineProperty("fold.rust.explicit.end", &OptionsRust::foldExplicitEnd,
 107                         "The string to use for explicit fold end points, replacing the standard //}.");
 108
 109                 DefineProperty("fold.rust.explicit.anywhere", &OptionsRust::foldExplicitAnywhere,
 110                         "Set this property to 1 to enable explicit fold points anywhere, not just in line comments.");
 111
 112                 DefineProperty("lexer.rust.fold.at.else", &OptionsRust::foldAtElseInt,
 113                         "This option enables Rust folding on a \"} else {\" line of an if statement.");
 114
 115                 DefineWordListSets(rustWordLists);
 116         }
 117 };
 118
 119 class LexerRust : public ILexer {
 120         WordList keywords[NUM_RUST_KEYWORD_LISTS];
 121         OptionsRust options;
 122         OptionSetRust osRust;
 123 public:
 124         virtual ~LexerRust() {
 125         }
 126         void SCI_METHOD Release() override {
 127                 delete this;
 128         }
 129         int SCI_METHOD Version() const override {
 130                 return lvOriginal;
 131         }
 132         const char * SCI_METHOD PropertyNames() override {
 133                 return osRust.PropertyNames();
 134         }
 135         int SCI_METHOD PropertyType(const char *name) override {
 136                 return osRust.PropertyType(name);
 137         }
 138         const char * SCI_METHOD DescribeProperty(const char *name) override {
 139                 return osRust.DescribeProperty(name);
 140         }
 141         Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override;
 142         const char * SCI_METHOD DescribeWordListSets() override {
 143                 return osRust.DescribeWordListSets();
 144         }
 145         Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override;
 146         void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
 147         void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
 148         void * SCI_METHOD PrivateCall(int, void *) override {
 149                 return 0;
 150         }
 151         static ILexer *LexerFactoryRust() {
 152                 return new LexerRust();
 153         }
 154 };
 155
 156 Sci_Position SCI_METHOD LexerRust::PropertySet(const char *key, const char *val) {
 157         if (osRust.PropertySet(&options, key, val)) {
 158                 return 0;
 159         }
 160         return -1;
 161 }
 162
 163 Sci_Position SCI_METHOD LexerRust::WordListSet(int n, const char *wl) {
 164         Sci_Position firstModification = -1;
 165         if (n < NUM_RUST_KEYWORD_LISTS) {
 166                 WordList *wordListN = &keywords[n];
 167                 WordList wlNew;
 168                 wlNew.Set(wl);
 169                 if (*wordListN != wlNew) {
 170                         wordListN->Set(wl);
 171                         firstModification = 0;
 172                 }
 173         }
 174         return firstModification;
 175 }
 176
 177 static bool IsWhitespace(int c) {
 178     return c == ' ' || c == '\t' || c == '\r' || c == '\n';
 179 }
 180
 181 /* This isn't quite right for Unicode identifiers */
 182 static bool IsIdentifierStart(int ch) {
 183         return (IsASCII(ch) && (isalpha(ch) || ch == '_')) || !IsASCII(ch);
 184 }
 185
 186 /* This isn't quite right for Unicode identifiers */
 187 static bool IsIdentifierContinue(int ch) {
 188         return (IsASCII(ch) && (isalnum(ch) || ch == '_')) || !IsASCII(ch);
 189 }
 190
 191 static void ScanWhitespace(Accessor& styler, Sci_Position& pos, Sci_Position max) {
 192         while (IsWhitespace(styler.SafeGetCharAt(pos, '\0')) && pos < max) {
 193                 if (pos == styler.LineEnd(styler.GetLine(pos)))
 194                         styler.SetLineState(styler.GetLine(pos), 0);
 195                 pos++;
 196         }
 197         styler.ColourTo(pos-1, SCE_RUST_DEFAULT);
 198 }
 199
 200 static void GrabString(char* s, Accessor& styler, Sci_Position start, Sci_Position len) {
 201         for (Sci_Position ii = 0; ii < len; ii++)
 202                 s[ii] = styler[ii + start];
 203         s[len] = '\0';
 204 }
 205
 206 static void ScanIdentifier(Accessor& styler, Sci_Position& pos, WordList *keywords) {
 207         Sci_Position start = pos;
 208         while (IsIdentifierContinue(styler.SafeGetCharAt(pos, '\0')))
 209                 pos++;
 210
 211         if (styler.SafeGetCharAt(pos, '\0') == '!') {
 212                 pos++;
 213                 styler.ColourTo(pos - 1, SCE_RUST_MACRO);
 214         } else {
 215                 char s[MAX_RUST_IDENT_CHARS + 1];
 216                 int len = pos - start;
 217                 len = len > MAX_RUST_IDENT_CHARS ? MAX_RUST_IDENT_CHARS : len;
 218                 GrabString(s, styler, start, len);
 219                 bool keyword = false;
 220                 for (int ii = 0; ii < NUM_RUST_KEYWORD_LISTS; ii++) {
 221                         if (keywords[ii].InList(s)) {
 222                                 styler.ColourTo(pos - 1, SCE_RUST_WORD + ii);
 223                                 keyword = true;
 224                                 break;
 225                         }
 226                 }
 227                 if (!keyword) {
 228                         styler.ColourTo(pos - 1, SCE_RUST_IDENTIFIER);
 229                 }
 230         }
 231 }
 232
 233 /* Scans a sequence of digits, returning true if it found any. */
 234 static bool ScanDigits(Accessor& styler, Sci_Position& pos, int base) {
 235         Sci_Position old_pos = pos;
 236         for (;;) {
 237                 int c = styler.SafeGetCharAt(pos, '\0');
 238                 if (IsADigit(c, base) || c == '_')
 239                         pos++;
 240                 else
 241                         break;
 242         }
 243         return old_pos != pos;
 244 }
 245
 246 /* Scans an integer and floating point literals. */
 247 static void ScanNumber(Accessor& styler, Sci_Position& pos) {
 248         int base = 10;
 249         int c = styler.SafeGetCharAt(pos, '\0');
 250         int n = styler.SafeGetCharAt(pos + 1, '\0');
 251         bool error = false;
 252         /* Scan the prefix, thus determining the base.
 253          * 10 is default if there's no prefix. */
 254         if (c == '0' && n == 'x') {
 255                 pos += 2;
 256                 base = 16;
 257         } else if (c == '0' && n == 'b') {
 258                 pos += 2;
 259                 base = 2;
 260         } else if (c == '0' && n == 'o') {
 261                 pos += 2;
 262                 base = 8;
 263         }
 264
 265         /* Scan initial digits. The literal is malformed if there are none. */
 266         error |= !ScanDigits(styler, pos, base);
 267         /* See if there's an integer suffix. We mimic the Rust's lexer
 268          * and munch it even if there was an error above. */
 269         c = styler.SafeGetCharAt(pos, '\0');
 270         if (c == 'u' || c == 'i') {
 271                 pos++;
 272                 c = styler.SafeGetCharAt(pos, '\0');
 273                 n = styler.SafeGetCharAt(pos + 1, '\0');
 274                 if (c == '8' || c == 's') {
 275                         pos++;
 276                 } else if (c == '1' && n == '6') {
 277                         pos += 2;
 278                 } else if (c == '3' && n == '2') {
 279                         pos += 2;
 280                 } else if (c == '6' && n == '4') {
 281                         pos += 2;
 282                 } else {
 283                         error = true;
 284                 }
 285         /* See if it's a floating point literal. These literals have to be base 10.
 286          */
 287         } else if (!error) {
 288                 /* If there's a period, it's a floating point literal unless it's
 289                  * followed by an identifier (meaning this is a method call, e.g.
 290                  * `1.foo()`) or another period, in which case it's a range (e.g. 1..2)
 291                  */
 292                 n = styler.SafeGetCharAt(pos + 1, '\0');
 293                 if (c == '.' && !(IsIdentifierStart(n) || n == '.')) {
 294                         error |= base != 10;
 295                         pos++;
 296                         /* It's ok to have no digits after the period. */
 297                         ScanDigits(styler, pos, 10);
 298                 }
 299
 300                 /* Look for the exponentiation. */
 301                 c = styler.SafeGetCharAt(pos, '\0');
 302                 if (c == 'e' || c == 'E') {
 303                         error |= base != 10;
 304                         pos++;
 305                         c = styler.SafeGetCharAt(pos, '\0');
 306                         if (c == '-' || c == '+')
 307                                 pos++;
 308                         /* It is invalid to have no digits in the exponent. */
 309                         error |= !ScanDigits(styler, pos, 10);
 310                 }
 311
 312                 /* Scan the floating point suffix. */
 313                 c = styler.SafeGetCharAt(pos, '\0');
 314                 if (c == 'f') {
 315                         error |= base != 10;
 316                         pos++;
 317                         c = styler.SafeGetCharAt(pos, '\0');
 318                         n = styler.SafeGetCharAt(pos + 1, '\0');
 319                         if (c == '3' && n == '2') {
 320                                 pos += 2;
 321                         } else if (c == '6' && n == '4') {
 322                                 pos += 2;
 323                         } else {
 324                                 error = true;
 325                         }
 326                 }
 327         }
 328
 329         if (error)
 330                 styler.ColourTo(pos - 1, SCE_RUST_LEXERROR);
 331         else
 332                 styler.ColourTo(pos - 1, SCE_RUST_NUMBER);
 333 }
 334
 335 static bool IsOneCharOperator(int c) {
 336         return c == ';' || c == ',' || c == '(' || c == ')'
 337             || c == '{' || c == '}' || c == '[' || c == ']'
 338             || c == '@' || c == '#' || c == '~' || c == '+'
 339             || c == '*' || c == '/' || c == '^' || c == '%'
 340             || c == '.' || c == ':' || c == '!' || c == '<'
 341             || c == '>' || c == '=' || c == '-' || c == '&'
 342             || c == '|' || c == '$' || c == '?';
 343 }
 344
 345 static bool IsTwoCharOperator(int c, int n) {
 346         return (c == '.' && n == '.') || (c == ':' && n == ':')
 347             || (c == '!' && n == '=') || (c == '<' && n == '<')
 348             || (c == '<' && n == '=') || (c == '>' && n == '>')
 349             || (c == '>' && n == '=') || (c == '=' && n == '=')
 350             || (c == '=' && n == '>') || (c == '-' && n == '>')
 351             || (c == '&' && n == '&') || (c == '|' && n == '|')
 352             || (c == '-' && n == '=') || (c == '&' && n == '=')
 353             || (c == '|' && n == '=') || (c == '+' && n == '=')
 354             || (c == '*' && n == '=') || (c == '/' && n == '=')
 355             || (c == '^' && n == '=') || (c == '%' && n == '=');
 356 }
 357
 358 static bool IsThreeCharOperator(int c, int n, int n2) {
 359         return (c == '<' && n == '<' && n2 == '=')
 360             || (c == '>' && n == '>' && n2 == '=');
 361 }
 362
 363 static bool IsValidCharacterEscape(int c) {
 364         return c == 'n'  || c == 'r' || c == 't' || c == '\\'
 365             || c == '\'' || c == '"' || c == '0';
 366 }
 367
 368 static bool IsValidStringEscape(int c) {
 369         return IsValidCharacterEscape(c) || c == '\n' || c == '\r';
 370 }
 371
 372 static bool ScanNumericEscape(Accessor &styler, Sci_Position& pos, Sci_Position num_digits, bool stop_asap) {
 373         for (;;) {
 374                 int c = styler.SafeGetCharAt(pos, '\0');
 375                 if (!IsADigit(c, 16))
 376                         break;
 377                 num_digits--;
 378                 pos++;
 379                 if (num_digits == 0 && stop_asap)
 380                         return true;
 381         }
 382         if (num_digits == 0) {
 383                 return true;
 384         } else {
 385                 return false;
 386         }
 387 }
 388
 389 /* This is overly permissive for character literals in order to accept UTF-8 encoded
 390  * character literals. */
 391 static void ScanCharacterLiteralOrLifetime(Accessor &styler, Sci_Position& pos, bool ascii_only) {
 392         pos++;
 393         int c = styler.SafeGetCharAt(pos, '\0');
 394         int n = styler.SafeGetCharAt(pos + 1, '\0');
 395         bool done = false;
 396         bool valid_lifetime = !ascii_only && IsIdentifierStart(c);
 397         bool valid_char = true;
 398         bool first = true;
 399         while (!done) {
 400                 switch (c) {
 401                         case '\\':
 402                                 done = true;
 403                                 if (IsValidCharacterEscape(n)) {
 404                                         pos += 2;
 405                                 } else if (n == 'x') {
 406                                         pos += 2;
 407                                         valid_char = ScanNumericEscape(styler, pos, 2, false);
 408                                 } else if (n == 'u' && !ascii_only) {
 409                                         pos += 2;
 410                                         if (styler.SafeGetCharAt(pos, '\0') != '{') {
 411                                                 // old-style
 412                                                 valid_char = ScanNumericEscape(styler, pos, 4, false);
 413                                         } else {
 414                                                 int n_digits = 0;
 415                                                 while (IsADigit(styler.SafeGetCharAt(++pos, '\0'), 16) && n_digits++ < 6) {
 416                                                 }
 417                                                 if (n_digits > 0 && styler.SafeGetCharAt(pos, '\0') == '}')
 418                                                         pos++;
 419                                                 else
 420                                                         valid_char = false;
 421                                         }
 422                                 } else if (n == 'U' && !ascii_only) {
 423                                         pos += 2;
 424                                         valid_char = ScanNumericEscape(styler, pos, 8, false);
 425                                 } else {
 426                                         valid_char = false;
 427                                 }
 428                                 break;
 429                         case '\'':
 430                                 valid_char = !first;
 431                                 done = true;
 432                                 break;
 433                         case '\t':
 434                         case '\n':
 435                         case '\r':
 436                         case '\0':
 437                                 valid_char = false;
 438                                 done = true;
 439                                 break;
 440                         default:
 441                                 if (ascii_only && !IsASCII((char)c)) {
 442                                         done = true;
 443                                         valid_char = false;
 444                                 } else if (!IsIdentifierContinue(c) && !first) {
 445                                         done = true;
 446                                 } else {
 447                                         pos++;
 448                                 }
 449                                 break;
 450                 }
 451                 c = styler.SafeGetCharAt(pos, '\0');
 452                 n = styler.SafeGetCharAt(pos + 1, '\0');
 453
 454                 first = false;
 455         }
 456         if (styler.SafeGetCharAt(pos, '\0') == '\'') {
 457                 valid_lifetime = false;
 458         } else {
 459                 valid_char = false;
 460         }
 461         if (valid_lifetime) {
 462                 styler.ColourTo(pos - 1, SCE_RUST_LIFETIME);
 463         } else if (valid_char) {
 464                 pos++;
 465                 styler.ColourTo(pos - 1, ascii_only ? SCE_RUST_BYTECHARACTER : SCE_RUST_CHARACTER);
 466         } else {
 467                 styler.ColourTo(pos - 1, SCE_RUST_LEXERROR);
 468         }
 469 }
 470
 471 enum CommentState {
 472         UnknownComment,
 473         DocComment,
 474         NotDocComment
 475 };
 476
 477 /*
 478  * The rule for block-doc comments is as follows: /xxN and /x! (where x is an asterisk, N is a non-asterisk) start doc comments.
 479  * Otherwise it's a regular comment.
 480  */
 481 static void ResumeBlockComment(Accessor &styler, Sci_Position& pos, Sci_Position max, CommentState state, int level) {
 482         int c = styler.SafeGetCharAt(pos, '\0');
 483         bool maybe_doc_comment = false;
 484         if (c == '*') {
 485                 int n = styler.SafeGetCharAt(pos + 1, '\0');
 486                 if (n != '*' && n != '/') {
 487                         maybe_doc_comment = true;
 488                 }
 489         } else if (c == '!') {
 490                 maybe_doc_comment = true;
 491         }
 492
 493         for (;;) {
 494                 int n = styler.SafeGetCharAt(pos + 1, '\0');
 495                 if (pos == styler.LineEnd(styler.GetLine(pos)))
 496                         styler.SetLineState(styler.GetLine(pos), level);
 497                 if (c == '*') {
 498                         pos++;
 499                         if (n == '/') {
 500                                 pos++;
 501                                 level--;
 502                                 if (level == 0) {
 503                                         styler.SetLineState(styler.GetLine(pos), 0);
 504                                         if (state == DocComment || (state == UnknownComment && maybe_doc_comment))
 505                                                 styler.ColourTo(pos - 1, SCE_RUST_COMMENTBLOCKDOC);
 506                                         else
 507                                                 styler.ColourTo(pos - 1, SCE_RUST_COMMENTBLOCK);
 508                                         break;
 509                                 }
 510                         }
 511                 } else if (c == '/') {
 512                         pos++;
 513                         if (n == '*') {
 514                                 pos++;
 515                                 level++;
 516                         }
 517                 }
 518                 else {
 519                         pos++;
 520                 }
 521                 if (pos >= max) {
 522                         if (state == DocComment || (state == UnknownComment && maybe_doc_comment))
 523                                 styler.ColourTo(pos - 1, SCE_RUST_COMMENTBLOCKDOC);
 524                         else
 525                                 styler.ColourTo(pos - 1, SCE_RUST_COMMENTBLOCK);
 526                         break;
 527                 }
 528                 c = styler.SafeGetCharAt(pos, '\0');
 529         }
 530 }
 531
 532 /*
 533  * The rule for line-doc comments is as follows... ///N and //! (where N is a non slash) start doc comments.
 534  * Otherwise it's a normal line comment.
 535  */
 536 static void ResumeLineComment(Accessor &styler, Sci_Position& pos, Sci_Position max, CommentState state) {
 537         bool maybe_doc_comment = false;
 538         int c = styler.SafeGetCharAt(pos, '\0');
 539         if (c == '/') {
 540                 if (pos < max) {
 541                         pos++;
 542                         c = styler.SafeGetCharAt(pos, '\0');
 543                         if (c != '/') {
 544                                 maybe_doc_comment = true;
 545                         }
 546                 }
 547         } else if (c == '!') {
 548                 maybe_doc_comment = true;
 549         }
 550
 551         while (pos < max && c != '\n') {
 552                 if (pos == styler.LineEnd(styler.GetLine(pos)))
 553                         styler.SetLineState(styler.GetLine(pos), 0);
 554                 pos++;
 555                 c = styler.SafeGetCharAt(pos, '\0');
 556         }
 557
 558         if (state == DocComment || (state == UnknownComment && maybe_doc_comment))
 559                 styler.ColourTo(pos - 1, SCE_RUST_COMMENTLINEDOC);
 560         else
 561                 styler.ColourTo(pos - 1, SCE_RUST_COMMENTLINE);
 562 }
 563
 564 static void ScanComments(Accessor &styler, Sci_Position& pos, Sci_Position max) {
 565         pos++;
 566         int c = styler.SafeGetCharAt(pos, '\0');
 567         pos++;
 568         if (c == '/')
 569                 ResumeLineComment(styler, pos, max, UnknownComment);
 570         else if (c == '*')
 571                 ResumeBlockComment(styler, pos, max, UnknownComment, 1);
 572 }
 573
 574 static void ResumeString(Accessor &styler, Sci_Position& pos, Sci_Position max, bool ascii_only) {
 575         int c = styler.SafeGetCharAt(pos, '\0');
 576         bool error = false;
 577         while (c != '"' && !error) {
 578                 if (pos >= max) {
 579                         error = true;
 580                         break;
 581                 }
 582                 if (pos == styler.LineEnd(styler.GetLine(pos)))
 583                         styler.SetLineState(styler.GetLine(pos), 0);
 584                 if (c == '\\') {
 585                         int n = styler.SafeGetCharAt(pos + 1, '\0');
 586                         if (IsValidStringEscape(n)) {
 587                                 pos += 2;
 588                         } else if (n == 'x') {
 589                                 pos += 2;
 590                                 error = !ScanNumericEscape(styler, pos, 2, true);
 591                         } else if (n == 'u' && !ascii_only) {
 592                                 pos += 2;
 593                                 if (styler.SafeGetCharAt(pos, '\0') != '{') {
 594                                         // old-style
 595                                         error = !ScanNumericEscape(styler, pos, 4, true);
 596                                 } else {
 597                                         int n_digits = 0;
 598                                         while (IsADigit(styler.SafeGetCharAt(++pos, '\0'), 16) && n_digits++ < 6) {
 599                                         }
 600                                         if (n_digits > 0 && styler.SafeGetCharAt(pos, '\0') == '}')
 601                                                 pos++;
 602                                         else
 603                                                 error = true;
 604                                 }
 605                         } else if (n == 'U' && !ascii_only) {
 606                                 pos += 2;
 607                                 error = !ScanNumericEscape(styler, pos, 8, true);
 608                         } else {
 609                                 pos += 1;
 610                                 error = true;
 611                         }
 612                 } else {
 613                         if (ascii_only && !IsASCII((char)c))
 614                                 error = true;
 615                         else
 616                                 pos++;
 617                 }
 618                 c = styler.SafeGetCharAt(pos, '\0');
 619         }
 620         if (!error)
 621                 pos++;
 622         styler.ColourTo(pos - 1, ascii_only ? SCE_RUST_BYTESTRING : SCE_RUST_STRING);
 623 }
 624
 625 static void ResumeRawString(Accessor &styler, Sci_Position& pos, Sci_Position max, int num_hashes, bool ascii_only) {
 626         for (;;) {
 627                 if (pos == styler.LineEnd(styler.GetLine(pos)))
 628                         styler.SetLineState(styler.GetLine(pos), num_hashes);
 629
 630                 int c = styler.SafeGetCharAt(pos, '\0');
 631                 if (c == '"') {
 632                         pos++;
 633                         int trailing_num_hashes = 0;
 634                         while (styler.SafeGetCharAt(pos, '\0') == '#' && trailing_num_hashes < num_hashes) {
 635                                 trailing_num_hashes++;
 636                                 pos++;
 637                         }
 638                         if (trailing_num_hashes == num_hashes) {
 639                                 styler.SetLineState(styler.GetLine(pos), 0);
 640                                 break;
 641                         }
 642                 } else if (pos >= max) {
 643                         break;
 644                 } else {
 645                         if (ascii_only && !IsASCII((char)c))
 646                                 break;
 647                         pos++;
 648                 }
 649         }
 650         styler.ColourTo(pos - 1, ascii_only ? SCE_RUST_BYTESTRINGR : SCE_RUST_STRINGR);
 651 }
 652
 653 static void ScanRawString(Accessor &styler, Sci_Position& pos, Sci_Position max, bool ascii_only) {
 654         pos++;
 655         int num_hashes = 0;
 656         while (styler.SafeGetCharAt(pos, '\0') == '#') {
 657                 num_hashes++;
 658                 pos++;
 659         }
 660         if (styler.SafeGetCharAt(pos, '\0') != '"') {
 661                 styler.ColourTo(pos - 1, SCE_RUST_LEXERROR);
 662         } else {
 663                 pos++;
 664                 ResumeRawString(styler, pos, max, num_hashes, ascii_only);
 665         }
 666 }
 667
 668 void SCI_METHOD LexerRust::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
 669         PropSetSimple props;
 670         Accessor styler(pAccess, &props);
 671         Sci_Position pos = startPos;
 672         Sci_Position max = pos + length;
 673
 674         styler.StartAt(pos);
 675         styler.StartSegment(pos);
 676
 677         if (initStyle == SCE_RUST_COMMENTBLOCK || initStyle == SCE_RUST_COMMENTBLOCKDOC) {
 678                 ResumeBlockComment(styler, pos, max, initStyle == SCE_RUST_COMMENTBLOCKDOC ? DocComment : NotDocComment, styler.GetLineState(styler.GetLine(pos) - 1));
 679         } else if (initStyle == SCE_RUST_COMMENTLINE || initStyle == SCE_RUST_COMMENTLINEDOC) {
 680                 ResumeLineComment(styler, pos, max, initStyle == SCE_RUST_COMMENTLINEDOC ? DocComment : NotDocComment);
 681         } else if (initStyle == SCE_RUST_STRING) {
 682                 ResumeString(styler, pos, max, false);
 683         } else if (initStyle == SCE_RUST_BYTESTRING) {
 684                 ResumeString(styler, pos, max, true);
 685         } else if (initStyle == SCE_RUST_STRINGR) {
 686                 ResumeRawString(styler, pos, max, styler.GetLineState(styler.GetLine(pos) - 1), false);
 687         } else if (initStyle == SCE_RUST_BYTESTRINGR) {
 688                 ResumeRawString(styler, pos, max, styler.GetLineState(styler.GetLine(pos) - 1), true);
 689         }
 690
 691         while (pos < max) {
 692                 int c = styler.SafeGetCharAt(pos, '\0');
 693                 int n = styler.SafeGetCharAt(pos + 1, '\0');
 694                 int n2 = styler.SafeGetCharAt(pos + 2, '\0');
 695
 696                 if (pos == 0 && c == '#' && n == '!' && n2 != '[') {
 697                         pos += 2;
 698                         ResumeLineComment(styler, pos, max, NotDocComment);
 699                 } else if (IsWhitespace(c)) {
 700                         ScanWhitespace(styler, pos, max);
 701                 } else if (c == '/' && (n == '/' || n == '*')) {
 702                         ScanComments(styler, pos, max);
 703                 } else if (c == 'r' && (n == '#' || n == '"')) {
 704                         ScanRawString(styler, pos, max, false);
 705                 } else if (c == 'b' && n == 'r' && (n2 == '#' || n2 == '"')) {
 706                         pos++;
 707                         ScanRawString(styler, pos, max, true);
 708                 } else if (c == 'b' && n == '"') {
 709                         pos += 2;
 710                         ResumeString(styler, pos, max, true);
 711                 } else if (c == 'b' && n == '\'') {
 712                         pos++;
 713                         ScanCharacterLiteralOrLifetime(styler, pos, true);
 714                 } else if (IsIdentifierStart(c)) {
 715                         ScanIdentifier(styler, pos, keywords);
 716                 } else if (IsADigit(c)) {
 717                         ScanNumber(styler, pos);
 718                 } else if (IsThreeCharOperator(c, n, n2)) {
 719                         pos += 3;
 720                         styler.ColourTo(pos - 1, SCE_RUST_OPERATOR);
 721                 } else if (IsTwoCharOperator(c, n)) {
 722                         pos += 2;
 723                         styler.ColourTo(pos - 1, SCE_RUST_OPERATOR);
 724                 } else if (IsOneCharOperator(c)) {
 725                         pos++;
 726                         styler.ColourTo(pos - 1, SCE_RUST_OPERATOR);
 727                 } else if (c == '\'') {
 728                         ScanCharacterLiteralOrLifetime(styler, pos, false);
 729                 } else if (c == '"') {
 730                         pos++;
 731                         ResumeString(styler, pos, max, false);
 732                 } else {
 733                         pos++;
 734                         styler.ColourTo(pos - 1, SCE_RUST_LEXERROR);
 735                 }
 736         }
 737         styler.ColourTo(pos - 1, SCE_RUST_DEFAULT);
 738         styler.Flush();
 739 }
 740
 741 void SCI_METHOD LexerRust::Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
 742
 743         if (!options.fold)
 744                 return;
 745
 746         LexAccessor styler(pAccess);
 747
 748         Sci_PositionU endPos = startPos + length;
 749         int visibleChars = 0;
 750         bool inLineComment = false;
 751         Sci_Position lineCurrent = styler.GetLine(startPos);
 752         int levelCurrent = SC_FOLDLEVELBASE;
 753         if (lineCurrent > 0)
 754                 levelCurrent = styler.LevelAt(lineCurrent-1) >> 16;
 755         Sci_PositionU lineStartNext = styler.LineStart(lineCurrent+1);
 756         int levelMinCurrent = levelCurrent;
 757         int levelNext = levelCurrent;
 758         char chNext = styler[startPos];
 759         int styleNext = styler.StyleAt(startPos);
 760         int style = initStyle;
 761         const bool userDefinedFoldMarkers = !options.foldExplicitStart.empty() && !options.foldExplicitEnd.empty();
 762         for (Sci_PositionU i = startPos; i < endPos; i++) {
 763                 char ch = chNext;
 764                 chNext = styler.SafeGetCharAt(i + 1);
 765                 int stylePrev = style;
 766                 style = styleNext;
 767                 styleNext = styler.StyleAt(i + 1);
 768                 bool atEOL = i == (lineStartNext-1);
 769                 if ((style == SCE_RUST_COMMENTLINE) || (style == SCE_RUST_COMMENTLINEDOC))
 770                         inLineComment = true;
 771                 if (options.foldComment && options.foldCommentMultiline && IsStreamCommentStyle(style) && !inLineComment) {
 772                         if (!IsStreamCommentStyle(stylePrev)) {
 773                                 levelNext++;
 774                         } else if (!IsStreamCommentStyle(styleNext) && !atEOL) {
 775                                 // Comments don't end at end of line and the next character may be unstyled.
 776                                 levelNext--;
 777                         }
 778                 }
 779                 if (options.foldComment && options.foldCommentExplicit && ((style == SCE_RUST_COMMENTLINE) || options.foldExplicitAnywhere)) {
 780                         if (userDefinedFoldMarkers) {
 781                                 if (styler.Match(i, options.foldExplicitStart.c_str())) {
 782                                         levelNext++;
 783                                 } else if (styler.Match(i, options.foldExplicitEnd.c_str())) {
 784                                         levelNext--;
 785                                 }
 786                         } else {
 787                                 if ((ch == '/') && (chNext == '/')) {
 788                                         char chNext2 = styler.SafeGetCharAt(i + 2);
 789                                         if (chNext2 == '{') {
 790                                                 levelNext++;
 791                                         } else if (chNext2 == '}') {
 792                                                 levelNext--;
 793                                         }
 794                                 }
 795                         }
 796                 }
 797                 if (options.foldSyntaxBased && (style == SCE_RUST_OPERATOR)) {
 798                         if (ch == '{') {
 799                                 // Measure the minimum before a '{' to allow
 800                                 // folding on "} else {"
 801                                 if (levelMinCurrent > levelNext) {
 802                                         levelMinCurrent = levelNext;
 803                                 }
 804                                 levelNext++;
 805                         } else if (ch == '}') {
 806                                 levelNext--;
 807                         }
 808                 }
 809                 if (!IsASpace(ch))
 810                         visibleChars++;
 811                 if (atEOL || (i == endPos-1)) {
 812                         int levelUse = levelCurrent;
 813                         if (options.foldSyntaxBased && options.foldAtElse) {
 814                                 levelUse = levelMinCurrent;
 815                         }
 816                         int lev = levelUse | levelNext << 16;
 817                         if (visibleChars == 0 && options.foldCompact)
 818                                 lev |= SC_FOLDLEVELWHITEFLAG;
 819                         if (levelUse < levelNext)
 820                                 lev |= SC_FOLDLEVELHEADERFLAG;
 821                         if (lev != styler.LevelAt(lineCurrent)) {
 822                                 styler.SetLevel(lineCurrent, lev);
 823                         }
 824                         lineCurrent++;
 825                         lineStartNext = styler.LineStart(lineCurrent+1);
 826                         levelCurrent = levelNext;
 827                         levelMinCurrent = levelCurrent;
 828                         if (atEOL && (i == static_cast<Sci_PositionU>(styler.Length()-1))) {
 829                                 // There is an empty line at end of file so give it same level and empty
 830                                 styler.SetLevel(lineCurrent, (levelCurrent | levelCurrent << 16) | SC_FOLDLEVELWHITEFLAG);
 831                         }
 832                         visibleChars = 0;
 833                         inLineComment = false;
 834                 }
 835         }
 836 }
 837
 838 LexerModule lmRust(SCLEX_RUST, LexerRust::LexerFactoryRust, "rust", rustWordLists);