ext/scintilla/lexers/LexRust.cxx

   1 /** @file LexRust.cxx
   2  ** Lexer for Rust.
   3  **
   4  ** Copyright (c) 2013 by SiegeLord <slabode@aim.com>
   5  ** Converted to lexer object and added further folding features/properties by "Udo Lechner" <dlchnr(at)gmx(dot)net>
   6  **/
   7 // Copyright 1998-2005 by Neil Hodgson <neilh@scintilla.org>
   8 // The License.txt file describes the conditions under which this software may be distributed.
   9
  10 #include <stdlib.h>
  11 #include <string.h>
  12 #include <stdio.h>
  13 #include <stdarg.h>
  14 #include <assert.h>
  15 #include <ctype.h>
  16
  17 #include <string>
  18 #include <map>
  19
  20 #include "ILexer.h"
  21 #include "Scintilla.h"
  22 #include "SciLexer.h"
  23
  24 #include "PropSetSimple.h"
  25 #include "WordList.h"
  26 #include "LexAccessor.h"
  27 #include "Accessor.h"
  28 #include "StyleContext.h"
  29 #include "CharacterSet.h"
  30 #include "LexerModule.h"
  31 #include "OptionSet.h"
  32 #include "DefaultLexer.h"
  33
  34 using namespace Scintilla;
  35
  36 static const int NUM_RUST_KEYWORD_LISTS = 7;
  37 static const int MAX_RUST_IDENT_CHARS = 1023;
  38
  39 static bool IsStreamCommentStyle(int style) {
  40         return style == SCE_RUST_COMMENTBLOCK ||
  41                    style == SCE_RUST_COMMENTBLOCKDOC;
  42 }
  43
  44 // Options used for LexerRust
  45 struct OptionsRust {
  46         bool fold;
  47         bool foldSyntaxBased;
  48         bool foldComment;
  49         bool foldCommentMultiline;
  50         bool foldCommentExplicit;
  51         std::string foldExplicitStart;
  52         std::string foldExplicitEnd;
  53         bool foldExplicitAnywhere;
  54         bool foldCompact;
  55         int  foldAtElseInt;
  56         bool foldAtElse;
  57         OptionsRust() {
  58                 fold = false;
  59                 foldSyntaxBased = true;
  60                 foldComment = false;
  61                 foldCommentMultiline = true;
  62                 foldCommentExplicit = true;
  63                 foldExplicitStart = "";
  64                 foldExplicitEnd   = "";
  65                 foldExplicitAnywhere = false;
  66                 foldCompact = true;
  67                 foldAtElseInt = -1;
  68                 foldAtElse = false;
  69         }
  70 };
  71
  72 static const char * const rustWordLists[NUM_RUST_KEYWORD_LISTS + 1] = {
  73                         "Primary keywords and identifiers",
  74                         "Built in types",
  75                         "Other keywords",
  76                         "Keywords 4",
  77                         "Keywords 5",
  78                         "Keywords 6",
  79                         "Keywords 7",
  80                         0,
  81                 };
  82
  83 struct OptionSetRust : public OptionSet<OptionsRust> {
  84         OptionSetRust() {
  85                 DefineProperty("fold", &OptionsRust::fold);
  86
  87                 DefineProperty("fold.comment", &OptionsRust::foldComment);
  88
  89                 DefineProperty("fold.compact", &OptionsRust::foldCompact);
  90
  91                 DefineProperty("fold.at.else", &OptionsRust::foldAtElse);
  92
  93                 DefineProperty("fold.rust.syntax.based", &OptionsRust::foldSyntaxBased,
  94                         "Set this property to 0 to disable syntax based folding.");
  95
  96                 DefineProperty("fold.rust.comment.multiline", &OptionsRust::foldCommentMultiline,
  97                         "Set this property to 0 to disable folding multi-line comments when fold.comment=1.");
  98
  99                 DefineProperty("fold.rust.comment.explicit", &OptionsRust::foldCommentExplicit,
 100                         "Set this property to 0 to disable folding explicit fold points when fold.comment=1.");
 101
 102                 DefineProperty("fold.rust.explicit.start", &OptionsRust::foldExplicitStart,
 103                         "The string to use for explicit fold start points, replacing the standard //{.");
 104
 105                 DefineProperty("fold.rust.explicit.end", &OptionsRust::foldExplicitEnd,
 106                         "The string to use for explicit fold end points, replacing the standard //}.");
 107
 108                 DefineProperty("fold.rust.explicit.anywhere", &OptionsRust::foldExplicitAnywhere,
 109                         "Set this property to 1 to enable explicit fold points anywhere, not just in line comments.");
 110
 111                 DefineProperty("lexer.rust.fold.at.else", &OptionsRust::foldAtElseInt,
 112                         "This option enables Rust folding on a \"} else {\" line of an if statement.");
 113
 114                 DefineWordListSets(rustWordLists);
 115         }
 116 };
 117
 118 class LexerRust : public DefaultLexer {
 119         WordList keywords[NUM_RUST_KEYWORD_LISTS];
 120         OptionsRust options;
 121         OptionSetRust osRust;
 122 public:
 123         virtual ~LexerRust() {
 124         }
 125         void SCI_METHOD Release() override {
 126                 delete this;
 127         }
 128         int SCI_METHOD Version() const override {
 129                 return lvRelease4;
 130         }
 131         const char * SCI_METHOD PropertyNames() override {
 132                 return osRust.PropertyNames();
 133         }
 134         int SCI_METHOD PropertyType(const char *name) override {
 135                 return osRust.PropertyType(name);
 136         }
 137         const char * SCI_METHOD DescribeProperty(const char *name) override {
 138                 return osRust.DescribeProperty(name);
 139         }
 140         Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override;
 141         const char * SCI_METHOD DescribeWordListSets() override {
 142                 return osRust.DescribeWordListSets();
 143         }
 144         Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override;
 145         void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
 146         void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
 147         void * SCI_METHOD PrivateCall(int, void *) override {
 148                 return 0;
 149         }
 150         static ILexer4 *LexerFactoryRust() {
 151                 return new LexerRust();
 152         }
 153 };
 154
 155 Sci_Position SCI_METHOD LexerRust::PropertySet(const char *key, const char *val) {
 156         if (osRust.PropertySet(&options, key, val)) {
 157                 return 0;
 158         }
 159         return -1;
 160 }
 161
 162 Sci_Position SCI_METHOD LexerRust::WordListSet(int n, const char *wl) {
 163         Sci_Position firstModification = -1;
 164         if (n < NUM_RUST_KEYWORD_LISTS) {
 165                 WordList *wordListN = &keywords[n];
 166                 WordList wlNew;
 167                 wlNew.Set(wl);
 168                 if (*wordListN != wlNew) {
 169                         wordListN->Set(wl);
 170                         firstModification = 0;
 171                 }
 172         }
 173         return firstModification;
 174 }
 175
 176 static bool IsWhitespace(int c) {
 177     return c == ' ' || c == '\t' || c == '\r' || c == '\n';
 178 }
 179
 180 /* This isn't quite right for Unicode identifiers */
 181 static bool IsIdentifierStart(int ch) {
 182         return (IsASCII(ch) && (isalpha(ch) || ch == '_')) || !IsASCII(ch);
 183 }
 184
 185 /* This isn't quite right for Unicode identifiers */
 186 static bool IsIdentifierContinue(int ch) {
 187         return (IsASCII(ch) && (isalnum(ch) || ch == '_')) || !IsASCII(ch);
 188 }
 189
 190 static void ScanWhitespace(Accessor& styler, Sci_Position& pos, Sci_Position max) {
 191         while (IsWhitespace(styler.SafeGetCharAt(pos, '\0')) && pos < max) {
 192                 if (pos == styler.LineEnd(styler.GetLine(pos)))
 193                         styler.SetLineState(styler.GetLine(pos), 0);
 194                 pos++;
 195         }
 196         styler.ColourTo(pos-1, SCE_RUST_DEFAULT);
 197 }
 198
 199 static void GrabString(char* s, Accessor& styler, Sci_Position start, Sci_Position len) {
 200         for (Sci_Position ii = 0; ii < len; ii++)
 201                 s[ii] = styler[ii + start];
 202         s[len] = '\0';
 203 }
 204
 205 static void ScanIdentifier(Accessor& styler, Sci_Position& pos, WordList *keywords) {
 206         Sci_Position start = pos;
 207         while (IsIdentifierContinue(styler.SafeGetCharAt(pos, '\0')))
 208                 pos++;
 209
 210         if (styler.SafeGetCharAt(pos, '\0') == '!') {
 211                 pos++;
 212                 styler.ColourTo(pos - 1, SCE_RUST_MACRO);
 213         } else {
 214                 char s[MAX_RUST_IDENT_CHARS + 1];
 215                 Sci_Position len = pos - start;
 216                 len = len > MAX_RUST_IDENT_CHARS ? MAX_RUST_IDENT_CHARS : len;
 217                 GrabString(s, styler, start, len);
 218                 bool keyword = false;
 219                 for (int ii = 0; ii < NUM_RUST_KEYWORD_LISTS; ii++) {
 220                         if (keywords[ii].InList(s)) {
 221                                 styler.ColourTo(pos - 1, SCE_RUST_WORD + ii);
 222                                 keyword = true;
 223                                 break;
 224                         }
 225                 }
 226                 if (!keyword) {
 227                         styler.ColourTo(pos - 1, SCE_RUST_IDENTIFIER);
 228                 }
 229         }
 230 }
 231
 232 /* Scans a sequence of digits, returning true if it found any. */
 233 static bool ScanDigits(Accessor& styler, Sci_Position& pos, int base) {
 234         Sci_Position old_pos = pos;
 235         for (;;) {
 236                 int c = styler.SafeGetCharAt(pos, '\0');
 237                 if (IsADigit(c, base) || c == '_')
 238                         pos++;
 239                 else
 240                         break;
 241         }
 242         return old_pos != pos;
 243 }
 244
 245 /* Scans an integer and floating point literals. */
 246 static void ScanNumber(Accessor& styler, Sci_Position& pos) {
 247         int base = 10;
 248         int c = styler.SafeGetCharAt(pos, '\0');
 249         int n = styler.SafeGetCharAt(pos + 1, '\0');
 250         bool error = false;
 251         /* Scan the prefix, thus determining the base.
 252          * 10 is default if there's no prefix. */
 253         if (c == '0' && n == 'x') {
 254                 pos += 2;
 255                 base = 16;
 256         } else if (c == '0' && n == 'b') {
 257                 pos += 2;
 258                 base = 2;
 259         } else if (c == '0' && n == 'o') {
 260                 pos += 2;
 261                 base = 8;
 262         }
 263
 264         /* Scan initial digits. The literal is malformed if there are none. */
 265         error |= !ScanDigits(styler, pos, base);
 266         /* See if there's an integer suffix. We mimic the Rust's lexer
 267          * and munch it even if there was an error above. */
 268         c = styler.SafeGetCharAt(pos, '\0');
 269         if (c == 'u' || c == 'i') {
 270                 pos++;
 271                 c = styler.SafeGetCharAt(pos, '\0');
 272                 n = styler.SafeGetCharAt(pos + 1, '\0');
 273                 if (c == '8') {
 274                         pos++;
 275                 } else if (c == '1' && n == '6') {
 276                         pos += 2;
 277                 } else if (c == '3' && n == '2') {
 278                         pos += 2;
 279                 } else if (c == '6' && n == '4') {
 280                         pos += 2;
 281                 } else if (styler.Match(pos, "size")) {
 282                         pos += 4;
 283                 } else {
 284                         error = true;
 285                 }
 286         /* See if it's a floating point literal. These literals have to be base 10.
 287          */
 288         } else if (!error) {
 289                 /* If there's a period, it's a floating point literal unless it's
 290                  * followed by an identifier (meaning this is a method call, e.g.
 291                  * `1.foo()`) or another period, in which case it's a range (e.g. 1..2)
 292                  */
 293                 n = styler.SafeGetCharAt(pos + 1, '\0');
 294                 if (c == '.' && !(IsIdentifierStart(n) || n == '.')) {
 295                         error |= base != 10;
 296                         pos++;
 297                         /* It's ok to have no digits after the period. */
 298                         ScanDigits(styler, pos, 10);
 299                 }
 300
 301                 /* Look for the exponentiation. */
 302                 c = styler.SafeGetCharAt(pos, '\0');
 303                 if (c == 'e' || c == 'E') {
 304                         error |= base != 10;
 305                         pos++;
 306                         c = styler.SafeGetCharAt(pos, '\0');
 307                         if (c == '-' || c == '+')
 308                                 pos++;
 309                         /* It is invalid to have no digits in the exponent. */
 310                         error |= !ScanDigits(styler, pos, 10);
 311                 }
 312
 313                 /* Scan the floating point suffix. */
 314                 c = styler.SafeGetCharAt(pos, '\0');
 315                 if (c == 'f') {
 316                         error |= base != 10;
 317                         pos++;
 318                         c = styler.SafeGetCharAt(pos, '\0');
 319                         n = styler.SafeGetCharAt(pos + 1, '\0');
 320                         if (c == '3' && n == '2') {
 321                                 pos += 2;
 322                         } else if (c == '6' && n == '4') {
 323                                 pos += 2;
 324                         } else {
 325                                 error = true;
 326                         }
 327                 }
 328         }
 329
 330         if (error)
 331                 styler.ColourTo(pos - 1, SCE_RUST_LEXERROR);
 332         else
 333                 styler.ColourTo(pos - 1, SCE_RUST_NUMBER);
 334 }
 335
 336 static bool IsOneCharOperator(int c) {
 337         return c == ';' || c == ',' || c == '(' || c == ')'
 338             || c == '{' || c == '}' || c == '[' || c == ']'
 339             || c == '@' || c == '#' || c == '~' || c == '+'
 340             || c == '*' || c == '/' || c == '^' || c == '%'
 341             || c == '.' || c == ':' || c == '!' || c == '<'
 342             || c == '>' || c == '=' || c == '-' || c == '&'
 343             || c == '|' || c == '$' || c == '?';
 344 }
 345
 346 static bool IsTwoCharOperator(int c, int n) {
 347         return (c == '.' && n == '.') || (c == ':' && n == ':')
 348             || (c == '!' && n == '=') || (c == '<' && n == '<')
 349             || (c == '<' && n == '=') || (c == '>' && n == '>')
 350             || (c == '>' && n == '=') || (c == '=' && n == '=')
 351             || (c == '=' && n == '>') || (c == '-' && n == '>')
 352             || (c == '&' && n == '&') || (c == '|' && n == '|')
 353             || (c == '-' && n == '=') || (c == '&' && n == '=')
 354             || (c == '|' && n == '=') || (c == '+' && n == '=')
 355             || (c == '*' && n == '=') || (c == '/' && n == '=')
 356             || (c == '^' && n == '=') || (c == '%' && n == '=');
 357 }
 358
 359 static bool IsThreeCharOperator(int c, int n, int n2) {
 360         return (c == '<' && n == '<' && n2 == '=')
 361             || (c == '>' && n == '>' && n2 == '=');
 362 }
 363
 364 static bool IsValidCharacterEscape(int c) {
 365         return c == 'n'  || c == 'r' || c == 't' || c == '\\'
 366             || c == '\'' || c == '"' || c == '0';
 367 }
 368
 369 static bool IsValidStringEscape(int c) {
 370         return IsValidCharacterEscape(c) || c == '\n' || c == '\r';
 371 }
 372
 373 static bool ScanNumericEscape(Accessor &styler, Sci_Position& pos, Sci_Position num_digits, bool stop_asap) {
 374         for (;;) {
 375                 int c = styler.SafeGetCharAt(pos, '\0');
 376                 if (!IsADigit(c, 16))
 377                         break;
 378                 num_digits--;
 379                 pos++;
 380                 if (num_digits == 0 && stop_asap)
 381                         return true;
 382         }
 383         if (num_digits == 0) {
 384                 return true;
 385         } else {
 386                 return false;
 387         }
 388 }
 389
 390 /* This is overly permissive for character literals in order to accept UTF-8 encoded
 391  * character literals. */
 392 static void ScanCharacterLiteralOrLifetime(Accessor &styler, Sci_Position& pos, bool ascii_only) {
 393         pos++;
 394         int c = styler.SafeGetCharAt(pos, '\0');
 395         int n = styler.SafeGetCharAt(pos + 1, '\0');
 396         bool done = false;
 397         bool valid_lifetime = !ascii_only && IsIdentifierStart(c);
 398         bool valid_char = true;
 399         bool first = true;
 400         while (!done) {
 401                 switch (c) {
 402                         case '\\':
 403                                 done = true;
 404                                 if (IsValidCharacterEscape(n)) {
 405                                         pos += 2;
 406                                 } else if (n == 'x') {
 407                                         pos += 2;
 408                                         valid_char = ScanNumericEscape(styler, pos, 2, false);
 409                                 } else if (n == 'u' && !ascii_only) {
 410                                         pos += 2;
 411                                         if (styler.SafeGetCharAt(pos, '\0') != '{') {
 412                                                 // old-style
 413                                                 valid_char = ScanNumericEscape(styler, pos, 4, false);
 414                                         } else {
 415                                                 int n_digits = 0;
 416                                                 while (IsADigit(styler.SafeGetCharAt(++pos, '\0'), 16) && n_digits++ < 6) {
 417                                                 }
 418                                                 if (n_digits > 0 && styler.SafeGetCharAt(pos, '\0') == '}')
 419                                                         pos++;
 420                                                 else
 421                                                         valid_char = false;
 422                                         }
 423                                 } else if (n == 'U' && !ascii_only) {
 424                                         pos += 2;
 425                                         valid_char = ScanNumericEscape(styler, pos, 8, false);
 426                                 } else {
 427                                         valid_char = false;
 428                                 }
 429                                 break;
 430                         case '\'':
 431                                 valid_char = !first;
 432                                 done = true;
 433                                 break;
 434                         case '\t':
 435                         case '\n':
 436                         case '\r':
 437                         case '\0':
 438                                 valid_char = false;
 439                                 done = true;
 440                                 break;
 441                         default:
 442                                 if (ascii_only && !IsASCII((char)c)) {
 443                                         done = true;
 444                                         valid_char = false;
 445                                 } else if (!IsIdentifierContinue(c) && !first) {
 446                                         done = true;
 447                                 } else {
 448                                         pos++;
 449                                 }
 450                                 break;
 451                 }
 452                 c = styler.SafeGetCharAt(pos, '\0');
 453                 n = styler.SafeGetCharAt(pos + 1, '\0');
 454
 455                 first = false;
 456         }
 457         if (styler.SafeGetCharAt(pos, '\0') == '\'') {
 458                 valid_lifetime = false;
 459         } else {
 460                 valid_char = false;
 461         }
 462         if (valid_lifetime) {
 463                 styler.ColourTo(pos - 1, SCE_RUST_LIFETIME);
 464         } else if (valid_char) {
 465                 pos++;
 466                 styler.ColourTo(pos - 1, ascii_only ? SCE_RUST_BYTECHARACTER : SCE_RUST_CHARACTER);
 467         } else {
 468                 styler.ColourTo(pos - 1, SCE_RUST_LEXERROR);
 469         }
 470 }
 471
 472 enum CommentState {
 473         UnknownComment,
 474         DocComment,
 475         NotDocComment
 476 };
 477
 478 /*
 479  * The rule for block-doc comments is as follows: /xxN and /x! (where x is an asterisk, N is a non-asterisk) start doc comments.
 480  * Otherwise it's a regular comment.
 481  */
 482 static void ResumeBlockComment(Accessor &styler, Sci_Position& pos, Sci_Position max, CommentState state, int level) {
 483         int c = styler.SafeGetCharAt(pos, '\0');
 484         bool maybe_doc_comment = false;
 485         if (c == '*') {
 486                 int n = styler.SafeGetCharAt(pos + 1, '\0');
 487                 if (n != '*' && n != '/') {
 488                         maybe_doc_comment = true;
 489                 }
 490         } else if (c == '!') {
 491                 maybe_doc_comment = true;
 492         }
 493
 494         for (;;) {
 495                 int n = styler.SafeGetCharAt(pos + 1, '\0');
 496                 if (pos == styler.LineEnd(styler.GetLine(pos)))
 497                         styler.SetLineState(styler.GetLine(pos), level);
 498                 if (c == '*') {
 499                         pos++;
 500                         if (n == '/') {
 501                                 pos++;
 502                                 level--;
 503                                 if (level == 0) {
 504                                         styler.SetLineState(styler.GetLine(pos), 0);
 505                                         if (state == DocComment || (state == UnknownComment && maybe_doc_comment))
 506                                                 styler.ColourTo(pos - 1, SCE_RUST_COMMENTBLOCKDOC);
 507                                         else
 508                                                 styler.ColourTo(pos - 1, SCE_RUST_COMMENTBLOCK);
 509                                         break;
 510                                 }
 511                         }
 512                 } else if (c == '/') {
 513                         pos++;
 514                         if (n == '*') {
 515                                 pos++;
 516                                 level++;
 517                         }
 518                 }
 519                 else {
 520                         pos++;
 521                 }
 522                 if (pos >= max) {
 523                         if (state == DocComment || (state == UnknownComment && maybe_doc_comment))
 524                                 styler.ColourTo(pos - 1, SCE_RUST_COMMENTBLOCKDOC);
 525                         else
 526                                 styler.ColourTo(pos - 1, SCE_RUST_COMMENTBLOCK);
 527                         break;
 528                 }
 529                 c = styler.SafeGetCharAt(pos, '\0');
 530         }
 531 }
 532
 533 /*
 534  * The rule for line-doc comments is as follows... ///N and //! (where N is a non slash) start doc comments.
 535  * Otherwise it's a normal line comment.
 536  */
 537 static void ResumeLineComment(Accessor &styler, Sci_Position& pos, Sci_Position max, CommentState state) {
 538         bool maybe_doc_comment = false;
 539         int c = styler.SafeGetCharAt(pos, '\0');
 540         if (c == '/') {
 541                 if (pos < max) {
 542                         pos++;
 543                         c = styler.SafeGetCharAt(pos, '\0');
 544                         if (c != '/') {
 545                                 maybe_doc_comment = true;
 546                         }
 547                 }
 548         } else if (c == '!') {
 549                 maybe_doc_comment = true;
 550         }
 551
 552         while (pos < max && c != '\n') {
 553                 if (pos == styler.LineEnd(styler.GetLine(pos)))
 554                         styler.SetLineState(styler.GetLine(pos), 0);
 555                 pos++;
 556                 c = styler.SafeGetCharAt(pos, '\0');
 557         }
 558
 559         if (state == DocComment || (state == UnknownComment && maybe_doc_comment))
 560                 styler.ColourTo(pos - 1, SCE_RUST_COMMENTLINEDOC);
 561         else
 562                 styler.ColourTo(pos - 1, SCE_RUST_COMMENTLINE);
 563 }
 564
 565 static void ScanComments(Accessor &styler, Sci_Position& pos, Sci_Position max) {
 566         pos++;
 567         int c = styler.SafeGetCharAt(pos, '\0');
 568         pos++;
 569         if (c == '/')
 570                 ResumeLineComment(styler, pos, max, UnknownComment);
 571         else if (c == '*')
 572                 ResumeBlockComment(styler, pos, max, UnknownComment, 1);
 573 }
 574
 575 static void ResumeString(Accessor &styler, Sci_Position& pos, Sci_Position max, bool ascii_only) {
 576         int c = styler.SafeGetCharAt(pos, '\0');
 577         bool error = false;
 578         while (c != '"' && !error) {
 579                 if (pos >= max) {
 580                         error = true;
 581                         break;
 582                 }
 583                 if (pos == styler.LineEnd(styler.GetLine(pos)))
 584                         styler.SetLineState(styler.GetLine(pos), 0);
 585                 if (c == '\\') {
 586                         int n = styler.SafeGetCharAt(pos + 1, '\0');
 587                         if (IsValidStringEscape(n)) {
 588                                 pos += 2;
 589                         } else if (n == 'x') {
 590                                 pos += 2;
 591                                 error = !ScanNumericEscape(styler, pos, 2, true);
 592                         } else if (n == 'u' && !ascii_only) {
 593                                 pos += 2;
 594                                 if (styler.SafeGetCharAt(pos, '\0') != '{') {
 595                                         // old-style
 596                                         error = !ScanNumericEscape(styler, pos, 4, true);
 597                                 } else {
 598                                         int n_digits = 0;
 599                                         while (IsADigit(styler.SafeGetCharAt(++pos, '\0'), 16) && n_digits++ < 6) {
 600                                         }
 601                                         if (n_digits > 0 && styler.SafeGetCharAt(pos, '\0') == '}')
 602                                                 pos++;
 603                                         else
 604                                                 error = true;
 605                                 }
 606                         } else if (n == 'U' && !ascii_only) {
 607                                 pos += 2;
 608                                 error = !ScanNumericEscape(styler, pos, 8, true);
 609                         } else {
 610                                 pos += 1;
 611                                 error = true;
 612                         }
 613                 } else {
 614                         if (ascii_only && !IsASCII((char)c))
 615                                 error = true;
 616                         else
 617                                 pos++;
 618                 }
 619                 c = styler.SafeGetCharAt(pos, '\0');
 620         }
 621         if (!error)
 622                 pos++;
 623         styler.ColourTo(pos - 1, ascii_only ? SCE_RUST_BYTESTRING : SCE_RUST_STRING);
 624 }
 625
 626 static void ResumeRawString(Accessor &styler, Sci_Position& pos, Sci_Position max, int num_hashes, bool ascii_only) {
 627         for (;;) {
 628                 if (pos == styler.LineEnd(styler.GetLine(pos)))
 629                         styler.SetLineState(styler.GetLine(pos), num_hashes);
 630
 631                 int c = styler.SafeGetCharAt(pos, '\0');
 632                 if (c == '"') {
 633                         pos++;
 634                         int trailing_num_hashes = 0;
 635                         while (styler.SafeGetCharAt(pos, '\0') == '#' && trailing_num_hashes < num_hashes) {
 636                                 trailing_num_hashes++;
 637                                 pos++;
 638                         }
 639                         if (trailing_num_hashes == num_hashes) {
 640                                 styler.SetLineState(styler.GetLine(pos), 0);
 641                                 break;
 642                         }
 643                 } else if (pos >= max) {
 644                         break;
 645                 } else {
 646                         if (ascii_only && !IsASCII((char)c))
 647                                 break;
 648                         pos++;
 649                 }
 650         }
 651         styler.ColourTo(pos - 1, ascii_only ? SCE_RUST_BYTESTRINGR : SCE_RUST_STRINGR);
 652 }
 653
 654 static void ScanRawString(Accessor &styler, Sci_Position& pos, Sci_Position max, bool ascii_only) {
 655         pos++;
 656         int num_hashes = 0;
 657         while (styler.SafeGetCharAt(pos, '\0') == '#') {
 658                 num_hashes++;
 659                 pos++;
 660         }
 661         if (styler.SafeGetCharAt(pos, '\0') != '"') {
 662                 styler.ColourTo(pos - 1, SCE_RUST_LEXERROR);
 663         } else {
 664                 pos++;
 665                 ResumeRawString(styler, pos, max, num_hashes, ascii_only);
 666         }
 667 }
 668
 669 void SCI_METHOD LexerRust::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
 670         PropSetSimple props;
 671         Accessor styler(pAccess, &props);
 672         Sci_Position pos = startPos;
 673         Sci_Position max = pos + length;
 674
 675         styler.StartAt(pos);
 676         styler.StartSegment(pos);
 677
 678         if (initStyle == SCE_RUST_COMMENTBLOCK || initStyle == SCE_RUST_COMMENTBLOCKDOC) {
 679                 ResumeBlockComment(styler, pos, max, initStyle == SCE_RUST_COMMENTBLOCKDOC ? DocComment : NotDocComment, styler.GetLineState(styler.GetLine(pos) - 1));
 680         } else if (initStyle == SCE_RUST_COMMENTLINE || initStyle == SCE_RUST_COMMENTLINEDOC) {
 681                 ResumeLineComment(styler, pos, max, initStyle == SCE_RUST_COMMENTLINEDOC ? DocComment : NotDocComment);
 682         } else if (initStyle == SCE_RUST_STRING) {
 683                 ResumeString(styler, pos, max, false);
 684         } else if (initStyle == SCE_RUST_BYTESTRING) {
 685                 ResumeString(styler, pos, max, true);
 686         } else if (initStyle == SCE_RUST_STRINGR) {
 687                 ResumeRawString(styler, pos, max, styler.GetLineState(styler.GetLine(pos) - 1), false);
 688         } else if (initStyle == SCE_RUST_BYTESTRINGR) {
 689                 ResumeRawString(styler, pos, max, styler.GetLineState(styler.GetLine(pos) - 1), true);
 690         }
 691
 692         while (pos < max) {
 693                 int c = styler.SafeGetCharAt(pos, '\0');
 694                 int n = styler.SafeGetCharAt(pos + 1, '\0');
 695                 int n2 = styler.SafeGetCharAt(pos + 2, '\0');
 696
 697                 if (pos == 0 && c == '#' && n == '!' && n2 != '[') {
 698                         pos += 2;
 699                         ResumeLineComment(styler, pos, max, NotDocComment);
 700                 } else if (IsWhitespace(c)) {
 701                         ScanWhitespace(styler, pos, max);
 702                 } else if (c == '/' && (n == '/' || n == '*')) {
 703                         ScanComments(styler, pos, max);
 704                 } else if (c == 'r' && (n == '#' || n == '"')) {
 705                         ScanRawString(styler, pos, max, false);
 706                 } else if (c == 'b' && n == 'r' && (n2 == '#' || n2 == '"')) {
 707                         pos++;
 708                         ScanRawString(styler, pos, max, true);
 709                 } else if (c == 'b' && n == '"') {
 710                         pos += 2;
 711                         ResumeString(styler, pos, max, true);
 712                 } else if (c == 'b' && n == '\'') {
 713                         pos++;
 714                         ScanCharacterLiteralOrLifetime(styler, pos, true);
 715                 } else if (IsIdentifierStart(c)) {
 716                         ScanIdentifier(styler, pos, keywords);
 717                 } else if (IsADigit(c)) {
 718                         ScanNumber(styler, pos);
 719                 } else if (IsThreeCharOperator(c, n, n2)) {
 720                         pos += 3;
 721                         styler.ColourTo(pos - 1, SCE_RUST_OPERATOR);
 722                 } else if (IsTwoCharOperator(c, n)) {
 723                         pos += 2;
 724                         styler.ColourTo(pos - 1, SCE_RUST_OPERATOR);
 725                 } else if (IsOneCharOperator(c)) {
 726                         pos++;
 727                         styler.ColourTo(pos - 1, SCE_RUST_OPERATOR);
 728                 } else if (c == '\'') {
 729                         ScanCharacterLiteralOrLifetime(styler, pos, false);
 730                 } else if (c == '"') {
 731                         pos++;
 732                         ResumeString(styler, pos, max, false);
 733                 } else {
 734                         pos++;
 735                         styler.ColourTo(pos - 1, SCE_RUST_LEXERROR);
 736                 }
 737         }
 738         styler.ColourTo(pos - 1, SCE_RUST_DEFAULT);
 739         styler.Flush();
 740 }
 741
 742 void SCI_METHOD LexerRust::Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
 743
 744         if (!options.fold)
 745                 return;
 746
 747         LexAccessor styler(pAccess);
 748
 749         Sci_PositionU endPos = startPos + length;
 750         int visibleChars = 0;
 751         bool inLineComment = false;
 752         Sci_Position lineCurrent = styler.GetLine(startPos);
 753         int levelCurrent = SC_FOLDLEVELBASE;
 754         if (lineCurrent > 0)
 755                 levelCurrent = styler.LevelAt(lineCurrent-1) >> 16;
 756         Sci_PositionU lineStartNext = styler.LineStart(lineCurrent+1);
 757         int levelMinCurrent = levelCurrent;
 758         int levelNext = levelCurrent;
 759         char chNext = styler[startPos];
 760         int styleNext = styler.StyleAt(startPos);
 761         int style = initStyle;
 762         const bool userDefinedFoldMarkers = !options.foldExplicitStart.empty() && !options.foldExplicitEnd.empty();
 763         for (Sci_PositionU i = startPos; i < endPos; i++) {
 764                 char ch = chNext;
 765                 chNext = styler.SafeGetCharAt(i + 1);
 766                 int stylePrev = style;
 767                 style = styleNext;
 768                 styleNext = styler.StyleAt(i + 1);
 769                 bool atEOL = i == (lineStartNext-1);
 770                 if ((style == SCE_RUST_COMMENTLINE) || (style == SCE_RUST_COMMENTLINEDOC))
 771                         inLineComment = true;
 772                 if (options.foldComment && options.foldCommentMultiline && IsStreamCommentStyle(style) && !inLineComment) {
 773                         if (!IsStreamCommentStyle(stylePrev)) {
 774                                 levelNext++;
 775                         } else if (!IsStreamCommentStyle(styleNext) && !atEOL) {
 776                                 // Comments don't end at end of line and the next character may be unstyled.
 777                                 levelNext--;
 778                         }
 779                 }
 780                 if (options.foldComment && options.foldCommentExplicit && ((style == SCE_RUST_COMMENTLINE) || options.foldExplicitAnywhere)) {
 781                         if (userDefinedFoldMarkers) {
 782                                 if (styler.Match(i, options.foldExplicitStart.c_str())) {
 783                                         levelNext++;
 784                                 } else if (styler.Match(i, options.foldExplicitEnd.c_str())) {
 785                                         levelNext--;
 786                                 }
 787                         } else {
 788                                 if ((ch == '/') && (chNext == '/')) {
 789                                         char chNext2 = styler.SafeGetCharAt(i + 2);
 790                                         if (chNext2 == '{') {
 791                                                 levelNext++;
 792                                         } else if (chNext2 == '}') {
 793                                                 levelNext--;
 794                                         }
 795                                 }
 796                         }
 797                 }
 798                 if (options.foldSyntaxBased && (style == SCE_RUST_OPERATOR)) {
 799                         if (ch == '{') {
 800                                 // Measure the minimum before a '{' to allow
 801                                 // folding on "} else {"
 802                                 if (levelMinCurrent > levelNext) {
 803                                         levelMinCurrent = levelNext;
 804                                 }
 805                                 levelNext++;
 806                         } else if (ch == '}') {
 807                                 levelNext--;
 808                         }
 809                 }
 810                 if (!IsASpace(ch))
 811                         visibleChars++;
 812                 if (atEOL || (i == endPos-1)) {
 813                         int levelUse = levelCurrent;
 814                         if (options.foldSyntaxBased && options.foldAtElse) {
 815                                 levelUse = levelMinCurrent;
 816                         }
 817                         int lev = levelUse | levelNext << 16;
 818                         if (visibleChars == 0 && options.foldCompact)
 819                                 lev |= SC_FOLDLEVELWHITEFLAG;
 820                         if (levelUse < levelNext)
 821                                 lev |= SC_FOLDLEVELHEADERFLAG;
 822                         if (lev != styler.LevelAt(lineCurrent)) {
 823                                 styler.SetLevel(lineCurrent, lev);
 824                         }
 825                         lineCurrent++;
 826                         lineStartNext = styler.LineStart(lineCurrent+1);
 827                         levelCurrent = levelNext;
 828                         levelMinCurrent = levelCurrent;
 829                         if (atEOL && (i == static_cast<Sci_PositionU>(styler.Length()-1))) {
 830                                 // There is an empty line at end of file so give it same level and empty
 831                                 styler.SetLevel(lineCurrent, (levelCurrent | levelCurrent << 16) | SC_FOLDLEVELWHITEFLAG);
 832                         }
 833                         visibleChars = 0;
 834                         inLineComment = false;
 835                 }
 836         }
 837 }
 838
 839 LexerModule lmRust(SCLEX_RUST, LexerRust::LexerFactoryRust, "rust", rustWordLists);