scintilla/lexers/LexHaskell.cxx

   1 /******************************************************************
   2  *    LexHaskell.cxx
   3  *
   4  *    A haskell lexer for the scintilla code control.
   5  *    Some stuff "lended" from LexPython.cxx and LexCPP.cxx.
   6  *    External lexer stuff inspired from the caml external lexer.
   7  *    Folder copied from Python's.
   8  *
   9  *    Written by Tobias Engvall - tumm at dtek dot chalmers dot se
  10  *
  11  *    Several bug fixes by Krasimir Angelov - kr.angelov at gmail.com
  12  *
  13  *    Improved by kudah <kudahkukarek@gmail.com>
  14  *
  15  *    TODO:
  16  *    * A proper lexical folder to fold group declarations, comments, pragmas,
  17  *      #ifdefs, explicit layout, lists, tuples, quasi-quotes, splces, etc, etc,
  18  *      etc.
  19  *
  20  *****************************************************************/
  21 #include <stdlib.h>
  22 #include <string.h>
  23 #include <stdio.h>
  24 #include <stdarg.h>
  25 #include <assert.h>
  26 #include <ctype.h>
  27
  28 #include <string>
  29 #include <map>
  30
  31 #include "ILexer.h"
  32 #include "Scintilla.h"
  33 #include "SciLexer.h"
  34
  35 #include "PropSetSimple.h"
  36 #include "WordList.h"
  37 #include "LexAccessor.h"
  38 #include "Accessor.h"
  39 #include "StyleContext.h"
  40 #include "CharacterSet.h"
  41 #include "CharacterCategory.h"
  42 #include "LexerModule.h"
  43 #include "OptionSet.h"
  44
  45 #ifdef SCI_NAMESPACE
  46 using namespace Scintilla;
  47 #endif
  48
  49 // See https://github.com/ghc/ghc/blob/master/compiler/parser/Lexer.x#L1682
  50 // Note, letter modifiers are prohibited.
  51
  52 static int u_iswupper (int ch) {
  53    CharacterCategory c = CategoriseCharacter(ch);
  54    return c == ccLu || c == ccLt;
  55 }
  56
  57 static int u_iswalpha (int ch) {
  58    CharacterCategory c = CategoriseCharacter(ch);
  59    return c == ccLl || c == ccLu || c == ccLt || c == ccLo;
  60 }
  61
  62 static int u_iswalnum (int ch) {
  63    CharacterCategory c = CategoriseCharacter(ch);
  64    return c == ccLl || c == ccLu || c == ccLt || c == ccLo
  65        || c == ccNd || c == ccNo;
  66 }
  67
  68 static int u_IsHaskellSymbol(int ch) {
  69    CharacterCategory c = CategoriseCharacter(ch);
  70    return c == ccPc || c == ccPd || c == ccPo
  71        || c == ccSm || c == ccSc || c == ccSk || c == ccSo;
  72 }
  73
  74 static inline bool IsHaskellLetter(const int ch) {
  75    if (IsASCII(ch)) {
  76       return (ch >= 'a' && ch <= 'z')
  77           || (ch >= 'A' && ch <= 'Z');
  78    } else {
  79       return u_iswalpha(ch) != 0;
  80    }
  81 }
  82
  83 static inline bool IsHaskellAlphaNumeric(const int ch) {
  84    if (IsASCII(ch)) {
  85       return IsAlphaNumeric(ch);
  86    } else {
  87       return u_iswalnum(ch) != 0;
  88    }
  89 }
  90
  91 static inline bool IsHaskellUpperCase(const int ch) {
  92    if (IsASCII(ch)) {
  93       return ch >= 'A' && ch <= 'Z';
  94    } else {
  95       return u_iswupper(ch) != 0;
  96    }
  97 }
  98
  99 static inline bool IsAnHaskellOperatorChar(const int ch) {
 100    if (IsASCII(ch)) {
 101       return
 102          (  ch == '!' || ch == '#' || ch == '$' || ch == '%'
 103          || ch == '&' || ch == '*' || ch == '+' || ch == '-'
 104          || ch == '.' || ch == '/' || ch == ':' || ch == '<'
 105          || ch == '=' || ch == '>' || ch == '?' || ch == '@'
 106          || ch == '^' || ch == '|' || ch == '~' || ch == '\\');
 107    } else {
 108       return u_IsHaskellSymbol(ch) != 0;
 109    }
 110 }
 111
 112 static inline bool IsAHaskellWordStart(const int ch) {
 113    return IsHaskellLetter(ch) || ch == '_';
 114 }
 115
 116 static inline bool IsAHaskellWordChar(const int ch) {
 117    return (  IsHaskellAlphaNumeric(ch)
 118           || ch == '_'
 119           || ch == '\'');
 120 }
 121
 122 static inline bool IsCommentBlockStyle(int style) {
 123    return (style >= SCE_HA_COMMENTBLOCK && style <= SCE_HA_COMMENTBLOCK3);
 124 }
 125
 126 static inline bool IsCommentStyle(int style) {
 127    return (style >= SCE_HA_COMMENTLINE && style <= SCE_HA_COMMENTBLOCK3)
 128        || ( style == SCE_HA_LITERATE_COMMENT
 129          || style == SCE_HA_LITERATE_CODEDELIM);
 130 }
 131
 132 // styles which do not belong to Haskell, but to external tools
 133 static inline bool IsExternalStyle(int style) {
 134    return ( style == SCE_HA_PREPROCESSOR
 135          || style == SCE_HA_LITERATE_COMMENT
 136          || style == SCE_HA_LITERATE_CODEDELIM);
 137 }
 138
 139 static inline int CommentBlockStyleFromNestLevel(const unsigned int nestLevel) {
 140    return SCE_HA_COMMENTBLOCK + (nestLevel % 3);
 141 }
 142
 143 // Mangled version of lexlib/Accessor.cxx IndentAmount.
 144 // Modified to treat comment blocks as whitespace
 145 // plus special case for commentline/preprocessor.
 146 static int HaskellIndentAmount(Accessor &styler, const int line) {
 147
 148    // Determines the indentation level of the current line
 149    // Comment blocks are treated as whitespace
 150
 151    int pos = styler.LineStart(line);
 152    int eol_pos = styler.LineStart(line + 1) - 1;
 153
 154    char ch = styler[pos];
 155    int style = styler.StyleAt(pos);
 156
 157    int indent = 0;
 158    bool inPrevPrefix = line > 0;
 159
 160    int posPrev = inPrevPrefix ? styler.LineStart(line-1) : 0;
 161
 162    while ((  ch == ' ' || ch == '\t'
 163           || IsCommentBlockStyle(style)
 164           || style == SCE_HA_LITERATE_CODEDELIM)
 165          && (pos < eol_pos)) {
 166       if (inPrevPrefix) {
 167          char chPrev = styler[posPrev++];
 168          if (chPrev != ' ' && chPrev != '\t') {
 169             inPrevPrefix = false;
 170          }
 171       }
 172       if (ch == '\t') {
 173          indent = (indent / 8 + 1) * 8;
 174       } else { // Space or comment block
 175          indent++;
 176       }
 177       pos++;
 178       ch = styler[pos];
 179       style = styler.StyleAt(pos);
 180    }
 181
 182    indent += SC_FOLDLEVELBASE;
 183    // if completely empty line or the start of a comment or preprocessor...
 184    if (  styler.LineStart(line) == styler.Length()
 185       || ch == ' '
 186       || ch == '\t'
 187       || ch == '\n'
 188       || ch == '\r'
 189       || IsCommentStyle(style)
 190       || style == SCE_HA_PREPROCESSOR)
 191       return indent | SC_FOLDLEVELWHITEFLAG;
 192    else
 193       return indent;
 194 }
 195
 196 struct OptionsHaskell {
 197    bool magicHash;
 198    bool allowQuotes;
 199    bool implicitParams;
 200    bool highlightSafe;
 201    bool cpp;
 202    bool stylingWithinPreprocessor;
 203    bool fold;
 204    bool foldComment;
 205    bool foldCompact;
 206    bool foldImports;
 207    OptionsHaskell() {
 208       magicHash = true;       // Widespread use, enabled by default.
 209       allowQuotes = true;     // Widespread use, enabled by default.
 210       implicitParams = false; // Fell out of favor, seldom used, disabled.
 211       highlightSafe = true;   // Moderately used, doesn't hurt to enable.
 212       cpp = true;             // Widespread use, enabled by default;
 213       stylingWithinPreprocessor = false;
 214       fold = false;
 215       foldComment = false;
 216       foldCompact = false;
 217       foldImports = false;
 218    }
 219 };
 220
 221 static const char * const haskellWordListDesc[] = {
 222    "Keywords",
 223    "FFI",
 224    "Reserved operators",
 225    0
 226 };
 227
 228 struct OptionSetHaskell : public OptionSet<OptionsHaskell> {
 229    OptionSetHaskell() {
 230       DefineProperty("lexer.haskell.allow.hash", &OptionsHaskell::magicHash,
 231          "Set to 0 to disallow the '#' character at the end of identifiers and "
 232          "literals with the haskell lexer "
 233          "(GHC -XMagicHash extension)");
 234
 235       DefineProperty("lexer.haskell.allow.quotes", &OptionsHaskell::allowQuotes,
 236          "Set to 0 to disable highlighting of Template Haskell name quotations "
 237          "and promoted constructors "
 238          "(GHC -XTemplateHaskell and -XDataKinds extensions)");
 239
 240       DefineProperty("lexer.haskell.allow.questionmark", &OptionsHaskell::implicitParams,
 241          "Set to 1 to allow the '?' character at the start of identifiers "
 242          "with the haskell lexer "
 243          "(GHC & Hugs -XImplicitParams extension)");
 244
 245       DefineProperty("lexer.haskell.import.safe", &OptionsHaskell::highlightSafe,
 246          "Set to 0 to disallow \"safe\" keyword in imports "
 247          "(GHC -XSafe, -XTrustworthy, -XUnsafe extensions)");
 248
 249       DefineProperty("lexer.haskell.cpp", &OptionsHaskell::cpp,
 250          "Set to 0 to disable C-preprocessor highlighting "
 251          "(-XCPP extension)");
 252
 253       DefineProperty("styling.within.preprocessor", &OptionsHaskell::stylingWithinPreprocessor,
 254          "For Haskell code, determines whether all preprocessor code is styled in the "
 255          "preprocessor style (0, the default) or only from the initial # to the end "
 256          "of the command word(1)."
 257          );
 258
 259       DefineProperty("fold", &OptionsHaskell::fold);
 260
 261       DefineProperty("fold.comment", &OptionsHaskell::foldComment);
 262
 263       DefineProperty("fold.compact", &OptionsHaskell::foldCompact);
 264
 265       DefineProperty("fold.haskell.imports", &OptionsHaskell::foldImports,
 266          "Set to 1 to enable folding of import declarations");
 267
 268       DefineWordListSets(haskellWordListDesc);
 269    }
 270 };
 271
 272 class LexerHaskell : public ILexer {
 273    bool literate;
 274    int firstImportLine;
 275    int firstImportIndent;
 276    WordList keywords;
 277    WordList ffi;
 278    WordList reserved_operators;
 279    OptionsHaskell options;
 280    OptionSetHaskell osHaskell;
 281
 282    enum HashCount {
 283        oneHash
 284       ,twoHashes
 285       ,unlimitedHashes
 286    };
 287
 288    enum KeywordMode {
 289        HA_MODE_DEFAULT = 0
 290       ,HA_MODE_IMPORT1 = 1 // after "import", before "qualified" or "safe" or package name or module name.
 291       ,HA_MODE_IMPORT2 = 2 // after module name, before "as" or "hiding".
 292       ,HA_MODE_IMPORT3 = 3 // after "as", before "hiding"
 293       ,HA_MODE_MODULE  = 4 // after "module", before module name.
 294       ,HA_MODE_FFI     = 5 // after "foreign", before FFI keywords
 295       ,HA_MODE_TYPE    = 6 // after "type" or "data", before "family"
 296    };
 297
 298    enum LiterateMode {
 299        LITERATE_BIRD  = 0 // if '>' is the first character on the line,
 300                           //   color '>' as a codedelim and the rest of
 301                           //   the line as code.
 302                           // else if "\begin{code}" is the only word on the
 303                           //    line except whitespace, switch to LITERATE_BLOCK
 304                           // otherwise color the line as a literate comment.
 305       ,LITERATE_BLOCK = 1 // if the string "\end{code}" is encountered at column
 306                           //   0 ignoring all later characters, color the line
 307                           //   as a codedelim and switch to LITERATE_BIRD
 308                           // otherwise color the line as code.
 309    };
 310
 311    struct HaskellLineInfo {
 312       unsigned int nestLevel; // 22 bits ought to be enough for anybody
 313       unsigned int nonexternalStyle; // 5 bits, widen if number of styles goes
 314                                      // beyond 31.
 315       bool pragma;
 316       LiterateMode lmode;
 317       KeywordMode mode;
 318
 319       HaskellLineInfo(int state) :
 320          nestLevel (state >> 10)
 321        , nonexternalStyle ((state >> 5) & 0x1F)
 322        , pragma ((state >> 4) & 0x1)
 323        , lmode (static_cast<LiterateMode>((state >> 3) & 0x1))
 324        , mode (static_cast<KeywordMode>(state & 0x7))
 325          {}
 326
 327       int ToLineState() {
 328          return
 329               (nestLevel << 10)
 330             | (nonexternalStyle << 5)
 331             | (pragma << 4)
 332             | (lmode << 3)
 333             | mode;
 334       }
 335    };
 336
 337    inline void skipMagicHash(StyleContext &sc, const HashCount hashes) const {
 338       if (options.magicHash && sc.ch == '#') {
 339          sc.Forward();
 340          if (hashes == twoHashes && sc.ch == '#') {
 341             sc.Forward();
 342          } else if (hashes == unlimitedHashes) {
 343             while (sc.ch == '#') {
 344                sc.Forward();
 345             }
 346          }
 347       }
 348    }
 349
 350    bool LineContainsImport(const int line, Accessor &styler) const {
 351       if (options.foldImports) {
 352          int currentPos = styler.LineStart(line);
 353          int style = styler.StyleAt(currentPos);
 354
 355          int eol_pos = styler.LineStart(line + 1) - 1;
 356
 357          while (currentPos < eol_pos) {
 358             int ch = styler[currentPos];
 359             style = styler.StyleAt(currentPos);
 360
 361             if (ch == ' ' || ch == '\t'
 362              || IsCommentBlockStyle(style)
 363              || style == SCE_HA_LITERATE_CODEDELIM) {
 364                currentPos++;
 365             } else {
 366                break;
 367             }
 368          }
 369
 370          return (style == SCE_HA_KEYWORD
 371               && styler.Match(currentPos, "import"));
 372       } else {
 373          return false;
 374       }
 375    }
 376
 377    inline int IndentAmountWithOffset(Accessor &styler, const int line) const {
 378       const int indent = HaskellIndentAmount(styler, line);
 379       const int indentLevel = indent & SC_FOLDLEVELNUMBERMASK;
 380       return indentLevel <= ((firstImportIndent - 1) + SC_FOLDLEVELBASE)
 381                ? indent
 382                : (indentLevel + firstImportIndent) | (indent & ~SC_FOLDLEVELNUMBERMASK);
 383    }
 384
 385    inline int IndentLevelRemoveIndentOffset(const int indentLevel) const {
 386       return indentLevel <= ((firstImportIndent - 1) + SC_FOLDLEVELBASE)
 387             ? indentLevel
 388             : indentLevel - firstImportIndent;
 389    }
 390
 391 public:
 392    LexerHaskell(bool literate_)
 393       : literate(literate_)
 394       , firstImportLine(-1)
 395       , firstImportIndent(0)
 396       {}
 397    virtual ~LexerHaskell() {}
 398
 399    void SCI_METHOD Release() {
 400       delete this;
 401    }
 402
 403    int SCI_METHOD Version() const {
 404       return lvOriginal;
 405    }
 406
 407    const char * SCI_METHOD PropertyNames() {
 408       return osHaskell.PropertyNames();
 409    }
 410
 411    int SCI_METHOD PropertyType(const char *name) {
 412       return osHaskell.PropertyType(name);
 413    }
 414
 415    const char * SCI_METHOD DescribeProperty(const char *name) {
 416       return osHaskell.DescribeProperty(name);
 417    }
 418
 419    int SCI_METHOD PropertySet(const char *key, const char *val);
 420
 421    const char * SCI_METHOD DescribeWordListSets() {
 422       return osHaskell.DescribeWordListSets();
 423    }
 424
 425    int SCI_METHOD WordListSet(int n, const char *wl);
 426
 427    void SCI_METHOD Lex(unsigned int startPos, int length, int initStyle, IDocument *pAccess);
 428
 429    void SCI_METHOD Fold(unsigned int startPos, int length, int initStyle, IDocument *pAccess);
 430
 431    void * SCI_METHOD PrivateCall(int, void *) {
 432       return 0;
 433    }
 434
 435    static ILexer *LexerFactoryHaskell() {
 436       return new LexerHaskell(false);
 437    }
 438
 439    static ILexer *LexerFactoryLiterateHaskell() {
 440       return new LexerHaskell(true);
 441    }
 442 };
 443
 444 int SCI_METHOD LexerHaskell::PropertySet(const char *key, const char *val) {
 445    if (osHaskell.PropertySet(&options, key, val)) {
 446       return 0;
 447    }
 448    return -1;
 449 }
 450
 451 int SCI_METHOD LexerHaskell::WordListSet(int n, const char *wl) {
 452    WordList *wordListN = 0;
 453    switch (n) {
 454    case 0:
 455       wordListN = &keywords;
 456       break;
 457    case 1:
 458       wordListN = &ffi;
 459       break;
 460    case 2:
 461       wordListN = &reserved_operators;
 462       break;
 463    }
 464    int firstModification = -1;
 465    if (wordListN) {
 466       WordList wlNew;
 467       wlNew.Set(wl);
 468       if (*wordListN != wlNew) {
 469          wordListN->Set(wl);
 470          firstModification = 0;
 471       }
 472    }
 473    return firstModification;
 474 }
 475
 476 void SCI_METHOD LexerHaskell::Lex(unsigned int startPos, int length, int initStyle
 477                                  ,IDocument *pAccess) {
 478    LexAccessor styler(pAccess);
 479
 480    int lineCurrent = styler.GetLine(startPos);
 481
 482    HaskellLineInfo hs = HaskellLineInfo(lineCurrent ? styler.GetLineState(lineCurrent-1) : 0);
 483
 484    // Do not leak onto next line
 485    if (initStyle == SCE_HA_STRINGEOL)
 486       initStyle = SCE_HA_DEFAULT;
 487    else if (initStyle == SCE_HA_LITERATE_CODEDELIM)
 488       initStyle = hs.nonexternalStyle;
 489
 490    StyleContext sc(startPos, length, initStyle, styler);
 491
 492    int base = 10;
 493    bool dot = false;
 494
 495    bool inDashes = false;
 496    bool alreadyInTheMiddleOfOperator = false;
 497
 498    assert(!(IsCommentBlockStyle(initStyle) && hs.nestLevel == 0));
 499
 500    while (sc.More()) {
 501       // Check for state end
 502
 503       if (!IsExternalStyle(sc.state)) {
 504          hs.nonexternalStyle = sc.state;
 505       }
 506
 507       // For lexer to work, states should unconditionally forward at least one
 508       // character.
 509       // If they don't, they should still check if they are at line end and
 510       // forward if so.
 511       // If a state forwards more than one character, it should check every time
 512       // that it is not a line end and cease forwarding otherwise.
 513       if (sc.atLineEnd) {
 514          // Remember the line state for future incremental lexing
 515          styler.SetLineState(lineCurrent, hs.ToLineState());
 516          lineCurrent++;
 517       }
 518
 519       // Handle line continuation generically.
 520       if (sc.ch == '\\' && (sc.chNext == '\n' || sc.chNext == '\r')
 521          && (  sc.state == SCE_HA_STRING
 522             || sc.state == SCE_HA_PREPROCESSOR)) {
 523          // Remember the line state for future incremental lexing
 524          styler.SetLineState(lineCurrent, hs.ToLineState());
 525          lineCurrent++;
 526
 527          sc.Forward();
 528          if (sc.ch == '\r' && sc.chNext == '\n') {
 529             sc.Forward();
 530          }
 531          sc.Forward();
 532
 533          continue;
 534       }
 535
 536       if (sc.atLineStart) {
 537
 538          if (sc.state == SCE_HA_STRING || sc.state == SCE_HA_CHARACTER) {
 539             // Prevent SCE_HA_STRINGEOL from leaking back to previous line
 540             sc.SetState(sc.state);
 541          }
 542
 543          if (literate && hs.lmode == LITERATE_BIRD) {
 544             if (!IsExternalStyle(sc.state)) {
 545                sc.SetState(SCE_HA_LITERATE_COMMENT);
 546             }
 547          }
 548       }
 549
 550       // External
 551          // Literate
 552       if (  literate && hs.lmode == LITERATE_BIRD && sc.atLineStart
 553          && sc.ch == '>') {
 554             sc.SetState(SCE_HA_LITERATE_CODEDELIM);
 555             sc.ForwardSetState(hs.nonexternalStyle);
 556       }
 557       else if (literate && hs.lmode == LITERATE_BIRD && sc.atLineStart
 558             && (  sc.ch == ' ' || sc.ch == '\t'
 559                || sc.Match("\\begin{code}"))) {
 560          sc.SetState(sc.state);
 561
 562          while ((sc.ch == ' ' || sc.ch == '\t') && sc.More())
 563             sc.Forward();
 564
 565          if (sc.Match("\\begin{code}")) {
 566             sc.Forward(static_cast<int>(strlen("\\begin{code}")));
 567
 568             bool correct = true;
 569
 570             while (!sc.atLineEnd && sc.More()) {
 571                if (sc.ch != ' ' && sc.ch != '\t') {
 572                   correct = false;
 573                }
 574                sc.Forward();
 575             }
 576
 577             if (correct) {
 578                sc.ChangeState(SCE_HA_LITERATE_CODEDELIM); // color the line end
 579                hs.lmode = LITERATE_BLOCK;
 580             }
 581          }
 582       }
 583       else if (literate && hs.lmode == LITERATE_BLOCK && sc.atLineStart
 584             && sc.Match("\\end{code}")) {
 585          sc.SetState(SCE_HA_LITERATE_CODEDELIM);
 586
 587          sc.Forward(static_cast<int>(strlen("\\end{code}")));
 588
 589          while (!sc.atLineEnd && sc.More()) {
 590             sc.Forward();
 591          }
 592
 593          sc.SetState(SCE_HA_LITERATE_COMMENT);
 594          hs.lmode = LITERATE_BIRD;
 595       }
 596          // Preprocessor
 597       else if (sc.atLineStart && sc.ch == '#' && options.cpp
 598             && (!options.stylingWithinPreprocessor || sc.state == SCE_HA_DEFAULT)) {
 599          sc.SetState(SCE_HA_PREPROCESSOR);
 600          sc.Forward();
 601       }
 602             // Literate
 603       else if (sc.state == SCE_HA_LITERATE_COMMENT) {
 604          sc.Forward();
 605       }
 606       else if (sc.state == SCE_HA_LITERATE_CODEDELIM) {
 607          sc.ForwardSetState(hs.nonexternalStyle);
 608       }
 609             // Preprocessor
 610       else if (sc.state == SCE_HA_PREPROCESSOR) {
 611          if (sc.atLineEnd) {
 612             sc.SetState(options.stylingWithinPreprocessor
 613                         ? SCE_HA_DEFAULT
 614                         : hs.nonexternalStyle);
 615             sc.Forward(); // prevent double counting a line
 616          } else if (options.stylingWithinPreprocessor && !IsHaskellLetter(sc.ch)) {
 617             sc.SetState(SCE_HA_DEFAULT);
 618          } else {
 619             sc.Forward();
 620          }
 621       }
 622       // Haskell
 623          // Operator
 624       else if (sc.state == SCE_HA_OPERATOR) {
 625          int style = SCE_HA_OPERATOR;
 626
 627          if ( sc.ch == ':'
 628             && !alreadyInTheMiddleOfOperator
 629             // except "::"
 630             && !( sc.chNext == ':'
 631                && !IsAnHaskellOperatorChar(sc.GetRelative(2)))) {
 632             style = SCE_HA_CAPITAL;
 633          }
 634
 635          alreadyInTheMiddleOfOperator = false;
 636
 637          while (IsAnHaskellOperatorChar(sc.ch))
 638                sc.Forward();
 639
 640          char s[100];
 641          sc.GetCurrent(s, sizeof(s));
 642
 643          if (reserved_operators.InList(s))
 644             style = SCE_HA_RESERVED_OPERATOR;
 645
 646          sc.ChangeState(style);
 647          sc.SetState(SCE_HA_DEFAULT);
 648       }
 649          // String
 650       else if (sc.state == SCE_HA_STRING) {
 651          if (sc.atLineEnd) {
 652             sc.ChangeState(SCE_HA_STRINGEOL);
 653             sc.ForwardSetState(SCE_HA_DEFAULT);
 654          } else if (sc.ch == '\"') {
 655             sc.Forward();
 656             skipMagicHash(sc, oneHash);
 657             sc.SetState(SCE_HA_DEFAULT);
 658          } else if (sc.ch == '\\') {
 659             sc.Forward(2);
 660          } else {
 661             sc.Forward();
 662          }
 663       }
 664          // Char
 665       else if (sc.state == SCE_HA_CHARACTER) {
 666          if (sc.atLineEnd) {
 667             sc.ChangeState(SCE_HA_STRINGEOL);
 668             sc.ForwardSetState(SCE_HA_DEFAULT);
 669          } else if (sc.ch == '\'') {
 670             sc.Forward();
 671             skipMagicHash(sc, oneHash);
 672             sc.SetState(SCE_HA_DEFAULT);
 673          } else if (sc.ch == '\\') {
 674             sc.Forward(2);
 675          } else {
 676             sc.Forward();
 677          }
 678       }
 679          // Number
 680       else if (sc.state == SCE_HA_NUMBER) {
 681          if (sc.atLineEnd) {
 682             sc.SetState(SCE_HA_DEFAULT);
 683             sc.Forward(); // prevent double counting a line
 684          } else if (IsADigit(sc.ch, base)) {
 685             sc.Forward();
 686          } else if (sc.ch=='.' && dot && IsADigit(sc.chNext, base)) {
 687             sc.Forward(2);
 688             dot = false;
 689          } else if ((base == 10) &&
 690                     (sc.ch == 'e' || sc.ch == 'E') &&
 691                     (IsADigit(sc.chNext) || sc.chNext == '+' || sc.chNext == '-')) {
 692             sc.Forward();
 693             if (sc.ch == '+' || sc.ch == '-')
 694                 sc.Forward();
 695          } else {
 696             skipMagicHash(sc, twoHashes);
 697             sc.SetState(SCE_HA_DEFAULT);
 698          }
 699       }
 700          // Keyword or Identifier
 701       else if (sc.state == SCE_HA_IDENTIFIER) {
 702          int style = IsHaskellUpperCase(sc.ch) ? SCE_HA_CAPITAL : SCE_HA_IDENTIFIER;
 703
 704          assert(IsAHaskellWordStart(sc.ch));
 705
 706          sc.Forward();
 707
 708          while (sc.More()) {
 709             if (IsAHaskellWordChar(sc.ch)) {
 710                sc.Forward();
 711             } else if (sc.ch == '.' && style == SCE_HA_CAPITAL) {
 712                if (IsHaskellUpperCase(sc.chNext)) {
 713                   sc.Forward();
 714                   style = SCE_HA_CAPITAL;
 715                } else if (IsAHaskellWordStart(sc.chNext)) {
 716                   sc.Forward();
 717                   style = SCE_HA_IDENTIFIER;
 718                } else if (IsAnHaskellOperatorChar(sc.chNext)) {
 719                   sc.Forward();
 720                   style = sc.ch == ':' ? SCE_HA_CAPITAL : SCE_HA_OPERATOR;
 721                   while (IsAnHaskellOperatorChar(sc.ch))
 722                      sc.Forward();
 723                   break;
 724                } else {
 725                   break;
 726                }
 727             } else {
 728                break;
 729             }
 730          }
 731
 732          skipMagicHash(sc, unlimitedHashes);
 733
 734          char s[100];
 735          sc.GetCurrent(s, sizeof(s));
 736
 737          KeywordMode new_mode = HA_MODE_DEFAULT;
 738
 739          if (keywords.InList(s)) {
 740             style = SCE_HA_KEYWORD;
 741          } else if (style == SCE_HA_CAPITAL) {
 742             if (hs.mode == HA_MODE_IMPORT1 || hs.mode == HA_MODE_IMPORT3) {
 743                style    = SCE_HA_MODULE;
 744                new_mode = HA_MODE_IMPORT2;
 745             } else if (hs.mode == HA_MODE_MODULE) {
 746                style = SCE_HA_MODULE;
 747             }
 748          } else if (hs.mode == HA_MODE_IMPORT1 &&
 749                     strcmp(s,"qualified") == 0) {
 750              style    = SCE_HA_KEYWORD;
 751              new_mode = HA_MODE_IMPORT1;
 752          } else if (options.highlightSafe &&
 753                     hs.mode == HA_MODE_IMPORT1 &&
 754                     strcmp(s,"safe") == 0) {
 755              style    = SCE_HA_KEYWORD;
 756              new_mode = HA_MODE_IMPORT1;
 757          } else if (hs.mode == HA_MODE_IMPORT2) {
 758              if (strcmp(s,"as") == 0) {
 759                 style    = SCE_HA_KEYWORD;
 760                 new_mode = HA_MODE_IMPORT3;
 761             } else if (strcmp(s,"hiding") == 0) {
 762                 style     = SCE_HA_KEYWORD;
 763             }
 764          } else if (hs.mode == HA_MODE_TYPE) {
 765             if (strcmp(s,"family") == 0)
 766                style    = SCE_HA_KEYWORD;
 767          }
 768
 769          if (hs.mode == HA_MODE_FFI) {
 770             if (ffi.InList(s)) {
 771                style = SCE_HA_KEYWORD;
 772                new_mode = HA_MODE_FFI;
 773             }
 774          }
 775
 776          sc.ChangeState(style);
 777          sc.SetState(SCE_HA_DEFAULT);
 778
 779          if (strcmp(s,"import") == 0 && hs.mode != HA_MODE_FFI)
 780             new_mode = HA_MODE_IMPORT1;
 781          else if (strcmp(s,"module") == 0)
 782             new_mode = HA_MODE_MODULE;
 783          else if (strcmp(s,"foreign") == 0)
 784             new_mode = HA_MODE_FFI;
 785          else if (strcmp(s,"type") == 0
 786                || strcmp(s,"data") == 0)
 787             new_mode = HA_MODE_TYPE;
 788
 789          hs.mode = new_mode;
 790       }
 791
 792          // Comments
 793             // Oneliner
 794       else if (sc.state == SCE_HA_COMMENTLINE) {
 795          if (sc.atLineEnd) {
 796             sc.SetState(hs.pragma ? SCE_HA_PRAGMA : SCE_HA_DEFAULT);
 797             sc.Forward(); // prevent double counting a line
 798          } else if (inDashes && sc.ch != '-' && !hs.pragma) {
 799             inDashes = false;
 800             if (IsAnHaskellOperatorChar(sc.ch)) {
 801                alreadyInTheMiddleOfOperator = true;
 802                sc.ChangeState(SCE_HA_OPERATOR);
 803             }
 804          } else {
 805             sc.Forward();
 806          }
 807       }
 808             // Nested
 809       else if (IsCommentBlockStyle(sc.state)) {
 810          if (sc.Match('{','-')) {
 811             sc.SetState(CommentBlockStyleFromNestLevel(hs.nestLevel));
 812             sc.Forward(2);
 813             hs.nestLevel++;
 814          } else if (sc.Match('-','}')) {
 815             sc.Forward(2);
 816             assert(hs.nestLevel > 0);
 817             if (hs.nestLevel > 0)
 818                hs.nestLevel--;
 819             sc.SetState(
 820                hs.nestLevel == 0
 821                   ? (hs.pragma ? SCE_HA_PRAGMA : SCE_HA_DEFAULT)
 822                   : CommentBlockStyleFromNestLevel(hs.nestLevel - 1));
 823          } else {
 824             sc.Forward();
 825          }
 826       }
 827             // Pragma
 828       else if (sc.state == SCE_HA_PRAGMA) {
 829          if (sc.Match("#-}")) {
 830             hs.pragma = false;
 831             sc.Forward(3);
 832             sc.SetState(SCE_HA_DEFAULT);
 833          } else if (sc.Match('-','-')) {
 834             sc.SetState(SCE_HA_COMMENTLINE);
 835             sc.Forward(2);
 836             inDashes = false;
 837          } else if (sc.Match('{','-')) {
 838             sc.SetState(CommentBlockStyleFromNestLevel(hs.nestLevel));
 839             sc.Forward(2);
 840             hs.nestLevel = 1;
 841          } else {
 842             sc.Forward();
 843          }
 844       }
 845             // New state?
 846       else if (sc.state == SCE_HA_DEFAULT) {
 847          // Digit
 848          if (IsADigit(sc.ch)) {
 849             hs.mode = HA_MODE_DEFAULT;
 850
 851             sc.SetState(SCE_HA_NUMBER);
 852             if (sc.ch == '0' && (sc.chNext == 'X' || sc.chNext == 'x')) {
 853                // Match anything starting with "0x" or "0X", too
 854                sc.Forward(2);
 855                base = 16;
 856                dot = false;
 857             } else if (sc.ch == '0' && (sc.chNext == 'O' || sc.chNext == 'o')) {
 858                // Match anything starting with "0o" or "0O", too
 859                sc.Forward(2);
 860                base = 8;
 861                dot = false;
 862             } else {
 863                sc.Forward();
 864                base = 10;
 865                dot = true;
 866             }
 867          }
 868          // Pragma
 869          else if (sc.Match("{-#")) {
 870             hs.pragma = true;
 871             sc.SetState(SCE_HA_PRAGMA);
 872             sc.Forward(3);
 873          }
 874          // Comment line
 875          else if (sc.Match('-','-')) {
 876             sc.SetState(SCE_HA_COMMENTLINE);
 877             sc.Forward(2);
 878             inDashes = true;
 879          }
 880          // Comment block
 881          else if (sc.Match('{','-')) {
 882             sc.SetState(CommentBlockStyleFromNestLevel(hs.nestLevel));
 883             sc.Forward(2);
 884             hs.nestLevel = 1;
 885          }
 886          // String
 887          else if (sc.ch == '\"') {
 888             sc.SetState(SCE_HA_STRING);
 889             sc.Forward();
 890          }
 891          // Character or quoted name or promoted term
 892          else if (sc.ch == '\'') {
 893             hs.mode = HA_MODE_DEFAULT;
 894
 895             sc.SetState(SCE_HA_CHARACTER);
 896             sc.Forward();
 897
 898             if (options.allowQuotes) {
 899                // Quoted type ''T
 900                if (sc.ch=='\'' && IsAHaskellWordStart(sc.chNext)) {
 901                   sc.Forward();
 902                   sc.ChangeState(SCE_HA_IDENTIFIER);
 903                } else if (sc.chNext != '\'') {
 904                   // Quoted name 'n or promoted constructor 'N
 905                   if (IsAHaskellWordStart(sc.ch)) {
 906                      sc.ChangeState(SCE_HA_IDENTIFIER);
 907                   // Promoted constructor operator ':~>
 908                   } else if (sc.ch == ':') {
 909                      alreadyInTheMiddleOfOperator = false;
 910                      sc.ChangeState(SCE_HA_OPERATOR);
 911                   // Promoted list or tuple '[T]
 912                   } else if (sc.ch == '[' || sc.ch== '(') {
 913                      sc.ChangeState(SCE_HA_OPERATOR);
 914                      sc.ForwardSetState(SCE_HA_DEFAULT);
 915                   }
 916                }
 917             }
 918          }
 919          // Operator starting with '?' or an implicit parameter
 920          else if (sc.ch == '?') {
 921             hs.mode = HA_MODE_DEFAULT;
 922
 923             alreadyInTheMiddleOfOperator = false;
 924             sc.SetState(SCE_HA_OPERATOR);
 925
 926             if (  options.implicitParams
 927                && IsAHaskellWordStart(sc.chNext)
 928                && !IsHaskellUpperCase(sc.chNext)) {
 929                sc.Forward();
 930                sc.ChangeState(SCE_HA_IDENTIFIER);
 931             }
 932          }
 933          // Operator
 934          else if (IsAnHaskellOperatorChar(sc.ch)) {
 935             hs.mode = HA_MODE_DEFAULT;
 936
 937             sc.SetState(SCE_HA_OPERATOR);
 938          }
 939          // Braces and punctuation
 940          else if (sc.ch == ',' || sc.ch == ';'
 941                || sc.ch == '(' || sc.ch == ')'
 942                || sc.ch == '[' || sc.ch == ']'
 943                || sc.ch == '{' || sc.ch == '}') {
 944             sc.SetState(SCE_HA_OPERATOR);
 945             sc.ForwardSetState(SCE_HA_DEFAULT);
 946          }
 947          // Keyword or Identifier
 948          else if (IsAHaskellWordStart(sc.ch)) {
 949             sc.SetState(SCE_HA_IDENTIFIER);
 950          // Something we don't care about
 951          } else {
 952             sc.Forward();
 953          }
 954       }
 955             // This branch should never be reached.
 956       else {
 957          assert(false);
 958          sc.Forward();
 959       }
 960    }
 961    sc.Complete();
 962 }
 963
 964 void SCI_METHOD LexerHaskell::Fold(unsigned int startPos, int length, int // initStyle
 965                                   ,IDocument *pAccess) {
 966    if (!options.fold)
 967       return;
 968
 969    Accessor styler(pAccess, NULL);
 970
 971    int lineCurrent = styler.GetLine(startPos);
 972
 973    if (lineCurrent <= firstImportLine) {
 974       firstImportLine = -1; // readjust first import position
 975       firstImportIndent = 0;
 976    }
 977
 978    const int maxPos = startPos + length;
 979    const int maxLines =
 980       maxPos == styler.Length()
 981          ? styler.GetLine(maxPos)
 982          : styler.GetLine(maxPos - 1);  // Requested last line
 983    const int docLines = styler.GetLine(styler.Length()); // Available last line
 984
 985    // Backtrack to previous non-blank line so we can determine indent level
 986    // for any white space lines
 987    // and so we can fix any preceding fold level (which is why we go back
 988    // at least one line in all cases)
 989    bool importHere = LineContainsImport(lineCurrent, styler);
 990    int indentCurrent = IndentAmountWithOffset(styler, lineCurrent);
 991
 992    while (lineCurrent > 0) {
 993       lineCurrent--;
 994       importHere = LineContainsImport(lineCurrent, styler);
 995       indentCurrent = IndentAmountWithOffset(styler, lineCurrent);
 996       if (!(indentCurrent & SC_FOLDLEVELWHITEFLAG))
 997          break;
 998    }
 999
1000    int indentCurrentLevel = indentCurrent & SC_FOLDLEVELNUMBERMASK;
1001
1002    if (importHere) {
1003       indentCurrentLevel = IndentLevelRemoveIndentOffset(indentCurrentLevel);
1004       if (firstImportLine == -1) {
1005          firstImportLine = lineCurrent;
1006          firstImportIndent = (1 + indentCurrentLevel) - SC_FOLDLEVELBASE;
1007       }
1008       if (firstImportLine != lineCurrent) {
1009          indentCurrentLevel++;
1010       }
1011    }
1012
1013    indentCurrent = indentCurrentLevel | (indentCurrent & ~SC_FOLDLEVELNUMBERMASK);
1014
1015    // Process all characters to end of requested range
1016    //that hangs over the end of the range.  Cap processing in all cases
1017    // to end of document.
1018    while (lineCurrent <= docLines && lineCurrent <= maxLines) {
1019
1020       // Gather info
1021       int lineNext = lineCurrent + 1;
1022       importHere = false;
1023       int indentNext = indentCurrent;
1024
1025       if (lineNext <= docLines) {
1026          // Information about next line is only available if not at end of document
1027          importHere = LineContainsImport(lineNext, styler);
1028          indentNext = IndentAmountWithOffset(styler, lineNext);
1029       }
1030       if (indentNext & SC_FOLDLEVELWHITEFLAG)
1031          indentNext = SC_FOLDLEVELWHITEFLAG | indentCurrentLevel;
1032
1033       // Skip past any blank lines for next indent level info; we skip also
1034       // comments (all comments, not just those starting in column 0)
1035       // which effectively folds them into surrounding code rather
1036       // than screwing up folding.
1037
1038       while (lineNext < docLines && (indentNext & SC_FOLDLEVELWHITEFLAG)) {
1039          lineNext++;
1040          importHere = LineContainsImport(lineNext, styler);
1041          indentNext = IndentAmountWithOffset(styler, lineNext);
1042       }
1043
1044       int indentNextLevel = indentNext & SC_FOLDLEVELNUMBERMASK;
1045
1046       if (importHere) {
1047          indentNextLevel = IndentLevelRemoveIndentOffset(indentNextLevel);
1048          if (firstImportLine == -1) {
1049             firstImportLine = lineNext;
1050             firstImportIndent = (1 + indentNextLevel) - SC_FOLDLEVELBASE;
1051          }
1052          if (firstImportLine != lineNext) {
1053             indentNextLevel++;
1054          }
1055       }
1056
1057       indentNext = indentNextLevel | (indentNext & ~SC_FOLDLEVELNUMBERMASK);
1058
1059       const int levelBeforeComments = Maximum(indentCurrentLevel,indentNextLevel);
1060
1061       // Now set all the indent levels on the lines we skipped
1062       // Do this from end to start.  Once we encounter one line
1063       // which is indented more than the line after the end of
1064       // the comment-block, use the level of the block before
1065
1066       int skipLine = lineNext;
1067       int skipLevel = indentNextLevel;
1068
1069       while (--skipLine > lineCurrent) {
1070          int skipLineIndent = IndentAmountWithOffset(styler, skipLine);
1071
1072          if (options.foldCompact) {
1073             if ((skipLineIndent & SC_FOLDLEVELNUMBERMASK) > indentNextLevel) {
1074                skipLevel = levelBeforeComments;
1075             }
1076
1077             int whiteFlag = skipLineIndent & SC_FOLDLEVELWHITEFLAG;
1078
1079             styler.SetLevel(skipLine, skipLevel | whiteFlag);
1080          } else {
1081             if (  (skipLineIndent & SC_FOLDLEVELNUMBERMASK) > indentNextLevel
1082                && !(skipLineIndent & SC_FOLDLEVELWHITEFLAG)) {
1083                skipLevel = levelBeforeComments;
1084             }
1085
1086             styler.SetLevel(skipLine, skipLevel);
1087          }
1088       }
1089
1090       int lev = indentCurrent;
1091
1092       if (!(indentCurrent & SC_FOLDLEVELWHITEFLAG)) {
1093          if ((indentCurrent & SC_FOLDLEVELNUMBERMASK) < (indentNext & SC_FOLDLEVELNUMBERMASK))
1094             lev |= SC_FOLDLEVELHEADERFLAG;
1095       }
1096
1097       // Set fold level for this line and move to next line
1098       styler.SetLevel(lineCurrent, options.foldCompact ? lev : lev & ~SC_FOLDLEVELWHITEFLAG);
1099
1100       indentCurrent = indentNext;
1101       indentCurrentLevel = indentNextLevel;
1102       lineCurrent = lineNext;
1103    }
1104
1105    // NOTE: Cannot set level of last line here because indentCurrent doesn't have
1106    // header flag set; the loop above is crafted to take care of this case!
1107    //styler.SetLevel(lineCurrent, indentCurrent);
1108 }
1109
1110 LexerModule lmHaskell(SCLEX_HASKELL, LexerHaskell::LexerFactoryHaskell, "haskell", haskellWordListDesc);
1111 LexerModule lmLiterateHaskell(SCLEX_LITERATEHASKELL, LexerHaskell::LexerFactoryLiterateHaskell, "literatehaskell", haskellWordListDesc);