1 // Scintilla source code edit control
3 ** Lexer for Objective Caml.
5 // Copyright 2005-2009 by Robert Roessler <robertr@rftp.com>
6 // The License.txt file describes the conditions under which this software may be distributed.
8 20050204 Initial release.
9 20050205 Quick compiler standards/"cleanliness" adjustment.
10 20050206 Added cast for IsLeadByte().
11 20050209 Changes to "external" build support.
12 20050306 Fix for 1st-char-in-doc "corner" case.
13 20050502 Fix for [harmless] one-past-the-end coloring.
14 20050515 Refined numeric token recognition logic.
15 20051125 Added 2nd "optional" keywords class.
16 20051129 Support "magic" (read-only) comments for RCaml.
17 20051204 Swtich to using StyleContext infrastructure.
18 20090629 Add full Standard ML '97 support.
29 #include "Scintilla.h"
32 #include "PropSetSimple.h"
34 #include "LexAccessor.h"
36 #include "StyleContext.h"
37 #include "CharacterSet.h"
38 #include "LexerModule.h"
40 // Since the Microsoft __iscsym[f] funcs are not ANSI...
41 inline int iscaml(int c
) {return isalnum(c
) || c
== '_';}
42 inline int iscamlf(int c
) {return isalpha(c
) || c
== '_';}
44 static const int baseT
[24] = {
45 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* A - L */
46 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0,16 /* M - X */
50 using namespace Scintilla
;
53 #ifdef BUILD_AS_EXTERNAL_LEXER
55 (actually seems to work!)
58 #include "WindowAccessor.h"
59 #include "ExternalLexer.h"
62 #define EXT_LEXER_DECL __declspec( dllexport ) __stdcall
68 static void ColouriseCamlDoc(
69 Sci_PositionU startPos
, Sci_Position length
,
71 WordList
*keywordlists
[],
74 static void FoldCamlDoc(
75 Sci_PositionU startPos
, Sci_Position length
,
77 WordList
*keywordlists
[],
80 static void InternalLexOrFold(int lexOrFold
, Sci_PositionU startPos
, Sci_Position length
,
81 int initStyle
, char *words
[], WindowID window
, char *props
);
83 static const char* LexerName
= "caml";
86 void Platform::DebugPrintf(const char *format
, ...) {
89 va_start(pArguments
, format
);
90 vsprintf(buffer
,format
,pArguments
);
92 Platform::DebugDisplay(buffer
);
95 void Platform::DebugPrintf(const char *, ...) {
99 bool Platform::IsDBCSLeadByte(int codePage
, char ch
) {
100 return ::IsDBCSLeadByteEx(codePage
, ch
) != 0;
103 long Platform::SendScintilla(WindowID w
, unsigned int msg
, unsigned long wParam
, long lParam
) {
104 return ::SendMessage(reinterpret_cast<HWND
>(w
), msg
, wParam
, lParam
);
107 long Platform::SendScintillaPointer(WindowID w
, unsigned int msg
, unsigned long wParam
, void *lParam
) {
108 return ::SendMessage(reinterpret_cast<HWND
>(w
), msg
, wParam
,
109 reinterpret_cast<LPARAM
>(lParam
));
112 void EXT_LEXER_DECL
Fold(unsigned int lexer
, Sci_PositionU startPos
, Sci_Position length
,
113 int initStyle
, char *words
[], WindowID window
, char *props
)
115 // below useless evaluation(s) to supress "not used" warnings
117 // build expected data structures and do the Fold
118 InternalLexOrFold(1, startPos
, length
, initStyle
, words
, window
, props
);
122 int EXT_LEXER_DECL
GetLexerCount()
124 return 1; // just us [Objective] Caml lexers here!
127 void EXT_LEXER_DECL
GetLexerName(unsigned int Index
, char *name
, int buflength
)
129 // below useless evaluation(s) to supress "not used" warnings
131 // return as much of our lexer name as will fit (what's up with Index?)
134 int n
= strlen(LexerName
);
137 memcpy(name
, LexerName
, n
), name
[n
] = '\0';
141 void EXT_LEXER_DECL
Lex(unsigned int lexer
, Sci_PositionU startPos
, Sci_Position length
,
142 int initStyle
, char *words
[], WindowID window
, char *props
)
144 // below useless evaluation(s) to supress "not used" warnings
146 // build expected data structures and do the Lex
147 InternalLexOrFold(0, startPos
, length
, initStyle
, words
, window
, props
);
150 static void InternalLexOrFold(int foldOrLex
, Sci_PositionU startPos
, Sci_Position length
,
151 int initStyle
, char *words
[], WindowID window
, char *props
)
153 // create and initialize a WindowAccessor (including contained PropSet)
155 ps
.SetMultiple(props
);
156 WindowAccessor
wa(window
, ps
);
157 // create and initialize WordList(s)
159 for (; words
[nWL
]; nWL
++) ; // count # of WordList PTRs needed
160 WordList
** wl
= new WordList
* [nWL
+ 1];// alloc WordList PTRs
162 for (; i
< nWL
; i
++) {
163 wl
[i
] = new WordList(); // (works or THROWS bad_alloc EXCEPTION)
164 wl
[i
]->Set(words
[i
]);
167 // call our "internal" folder/lexer (... then do Flush!)
169 FoldCamlDoc(startPos
, length
, initStyle
, wl
, wa
);
171 ColouriseCamlDoc(startPos
, length
, initStyle
, wl
, wa
);
173 // clean up before leaving
174 for (i
= nWL
- 1; i
>= 0; i
--)
180 #endif /* BUILD_AS_EXTERNAL_LEXER */
182 void ColouriseCamlDoc(
183 Sci_PositionU startPos
, Sci_Position length
,
185 WordList
*keywordlists
[],
189 StyleContext
sc(startPos
, length
, initStyle
, styler
);
191 Sci_PositionU chToken
= 0;
192 int chBase
= 0, chLit
= 0;
193 WordList
& keywords
= *keywordlists
[0];
194 WordList
& keywords2
= *keywordlists
[1];
195 WordList
& keywords3
= *keywordlists
[2];
196 const bool isSML
= keywords
.InList("andalso");
197 const int useMagic
= styler
.GetPropertyInt("lexer.caml.magic", 0);
199 // set up [initial] state info (terminating states that shouldn't "bleed")
200 const int state_
= sc
.state
& 0x0f;
201 if (state_
<= SCE_CAML_CHAR
202 || (isSML
&& state_
== SCE_CAML_STRING
))
203 sc
.state
= SCE_CAML_DEFAULT
;
204 int nesting
= (state_
>= SCE_CAML_COMMENT
)? (state_
- SCE_CAML_COMMENT
): 0;
206 // foreach char in range...
208 // set up [per-char] state info
209 int state2
= -1; // (ASSUME no state change)
210 Sci_Position chColor
= sc
.currentPos
- 1;// (ASSUME standard coloring range)
211 bool advance
= true; // (ASSUME scanner "eats" 1 char)
213 // step state machine
214 switch (sc
.state
& 0x0f) {
215 case SCE_CAML_DEFAULT
:
216 chToken
= sc
.currentPos
; // save [possible] token start (JIC)
217 // it's wide open; what do we have?
219 state2
= SCE_CAML_IDENTIFIER
;
220 else if (!isSML
&& sc
.Match('`') && iscamlf(sc
.chNext
))
221 state2
= SCE_CAML_TAGNAME
;
222 else if (!isSML
&& sc
.Match('#') && isdigit(sc
.chNext
))
223 state2
= SCE_CAML_LINENUM
;
224 else if (isdigit(sc
.ch
)) {
225 // it's a number, assume base 10
226 state2
= SCE_CAML_NUMBER
, chBase
= 10;
228 // there MAY be a base specified...
229 const char* baseC
= "bBoOxX";
231 if (sc
.chNext
== 'w')
232 sc
.Forward(); // (consume SML "word" indicator)
235 // ... change to specified base AS REQUIRED
236 if (strchr(baseC
, sc
.chNext
))
237 chBase
= baseT
[tolower(sc
.chNext
) - 'a'], sc
.Forward();
239 } else if (!isSML
&& sc
.Match('\'')) // (Caml char literal?)
240 state2
= SCE_CAML_CHAR
, chLit
= 0;
241 else if (isSML
&& sc
.Match('#', '"')) // (SML char literal?)
242 state2
= SCE_CAML_CHAR
, sc
.Forward();
243 else if (sc
.Match('"'))
244 state2
= SCE_CAML_STRING
;
245 else if (sc
.Match('(', '*'))
246 state2
= SCE_CAML_COMMENT
, sc
.Forward(), sc
.ch
= ' '; // (*)...
247 else if (strchr("!?~" /* Caml "prefix-symbol" */
248 "=<>@^|&+-*/$%" /* Caml "infix-symbol" */
249 "()[]{};,:.#", sc
.ch
) // Caml "bracket" or ;,:.#
250 // SML "extra" ident chars
251 || (isSML
&& (sc
.Match('\\') || sc
.Match('`'))))
252 state2
= SCE_CAML_OPERATOR
;
255 case SCE_CAML_IDENTIFIER
:
256 // [try to] interpret as [additional] identifier char
257 if (!(iscaml(sc
.ch
) || sc
.Match('\''))) {
258 const Sci_Position n
= sc
.currentPos
- chToken
;
260 // length is believable as keyword, [re-]construct token
262 for (Sci_Position i
= -n
; i
< 0; i
++)
263 t
[n
+ i
] = static_cast<char>(sc
.GetRelative(i
));
265 // special-case "_" token as KEYWORD
266 if ((n
== 1 && sc
.chPrev
== '_') || keywords
.InList(t
))
267 sc
.ChangeState(SCE_CAML_KEYWORD
);
268 else if (keywords2
.InList(t
))
269 sc
.ChangeState(SCE_CAML_KEYWORD2
);
270 else if (keywords3
.InList(t
))
271 sc
.ChangeState(SCE_CAML_KEYWORD3
);
273 state2
= SCE_CAML_DEFAULT
, advance
= false;
277 case SCE_CAML_TAGNAME
:
278 // [try to] interpret as [additional] tagname char
279 if (!(iscaml(sc
.ch
) || sc
.Match('\'')))
280 state2
= SCE_CAML_DEFAULT
, advance
= false;
283 /*case SCE_CAML_KEYWORD:
284 case SCE_CAML_KEYWORD2:
285 case SCE_CAML_KEYWORD3:
286 // [try to] interpret as [additional] keyword char
288 state2 = SCE_CAML_DEFAULT, advance = false;
291 case SCE_CAML_LINENUM
:
292 // [try to] interpret as [additional] linenum directive char
294 state2
= SCE_CAML_DEFAULT
, advance
= false;
297 case SCE_CAML_OPERATOR
: {
298 // [try to] interpret as [additional] operator char
300 if (iscaml(sc
.ch
) || isspace(sc
.ch
) // ident or whitespace
301 || (o
= strchr(")]};,\'\"#", sc
.ch
),o
) // "termination" chars
302 || (!isSML
&& sc
.Match('`')) // Caml extra term char
303 || (!strchr("!$%&*+-./:<=>?@^|~", sc
.ch
)// "operator" chars
304 // SML extra ident chars
305 && !(isSML
&& (sc
.Match('\\') || sc
.Match('`'))))) {
306 // check for INCLUSIVE termination
307 if (o
&& strchr(")]};,", sc
.ch
)) {
308 if ((sc
.Match(')') && sc
.chPrev
== '(')
309 || (sc
.Match(']') && sc
.chPrev
== '['))
310 // special-case "()" and "[]" tokens as KEYWORDS
311 sc
.ChangeState(SCE_CAML_KEYWORD
);
315 state2
= SCE_CAML_DEFAULT
;
320 case SCE_CAML_NUMBER
:
321 // [try to] interpret as [additional] numeric literal char
322 if ((!isSML
&& sc
.Match('_')) || IsADigit(sc
.ch
, chBase
))
324 // how about an integer suffix?
325 if (!isSML
&& (sc
.Match('l') || sc
.Match('L') || sc
.Match('n'))
326 && (sc
.chPrev
== '_' || IsADigit(sc
.chPrev
, chBase
)))
328 // or a floating-point literal?
330 // with a decimal point?
332 && ((!isSML
&& sc
.chPrev
== '_')
333 || IsADigit(sc
.chPrev
, chBase
)))
335 // with an exponent? (I)
336 if ((sc
.Match('e') || sc
.Match('E'))
337 && ((!isSML
&& (sc
.chPrev
== '.' || sc
.chPrev
== '_'))
338 || IsADigit(sc
.chPrev
, chBase
)))
340 // with an exponent? (II)
341 if (((!isSML
&& (sc
.Match('+') || sc
.Match('-')))
342 || (isSML
&& sc
.Match('~')))
343 && (sc
.chPrev
== 'e' || sc
.chPrev
== 'E'))
346 // it looks like we have run out of number
347 state2
= SCE_CAML_DEFAULT
, advance
= false;
352 // [try to] interpret as [additional] char literal char
353 if (sc
.Match('\\')) {
354 chLit
= 1; // (definitely IS a char literal)
355 if (sc
.chPrev
== '\\')
356 sc
.ch
= ' '; // (...\\')
357 // should we be terminating - one way or another?
358 } else if ((sc
.Match('\'') && sc
.chPrev
!= '\\')
360 state2
= SCE_CAML_DEFAULT
;
364 sc
.ChangeState(SCE_CAML_IDENTIFIER
);
365 // ... maybe a char literal, maybe not
366 } else if (chLit
< 1 && sc
.currentPos
- chToken
>= 2)
367 sc
.ChangeState(SCE_CAML_IDENTIFIER
), advance
= false;
370 // fall through for SML char literal (handle like string) */
372 case SCE_CAML_STRING
:
373 // [try to] interpret as [additional] [SML char/] string literal char
374 if (isSML
&& sc
.Match('\\') && sc
.chPrev
!= '\\' && isspace(sc
.chNext
))
375 state2
= SCE_CAML_WHITE
;
376 else if (sc
.Match('\\') && sc
.chPrev
== '\\')
377 sc
.ch
= ' '; // (...\\")
378 // should we be terminating - one way or another?
379 else if ((sc
.Match('"') && sc
.chPrev
!= '\\')
380 || (isSML
&& sc
.atLineEnd
)) {
381 state2
= SCE_CAML_DEFAULT
;
388 // [try to] interpret as [additional] SML embedded whitespace char
389 if (sc
.Match('\\')) {
390 // style this puppy NOW...
391 state2
= SCE_CAML_STRING
, sc
.ch
= ' ' /* (...\") */, chColor
++,
392 styler
.ColourTo(chColor
, SCE_CAML_WHITE
), styler
.Flush();
393 // ... then backtrack to determine original SML literal type
394 Sci_Position p
= chColor
- 2;
395 for (; p
>= 0 && styler
.StyleAt(p
) == SCE_CAML_WHITE
; p
--) ;
397 state2
= static_cast<int>(styler
.StyleAt(p
));
398 // take care of state change NOW
399 sc
.ChangeState(state2
), state2
= -1;
403 case SCE_CAML_COMMENT
:
404 case SCE_CAML_COMMENT1
:
405 case SCE_CAML_COMMENT2
:
406 case SCE_CAML_COMMENT3
:
407 // we're IN a comment - does this start a NESTED comment?
408 if (sc
.Match('(', '*'))
409 state2
= sc
.state
+ 1, chToken
= sc
.currentPos
,
410 sc
.Forward(), sc
.ch
= ' ' /* (*)... */, nesting
++;
411 // [try to] interpret as [additional] comment char
412 else if (sc
.Match(')') && sc
.chPrev
== '*') {
414 state2
= (sc
.state
& 0x0f) - 1, chToken
= 0, nesting
--;
416 state2
= SCE_CAML_DEFAULT
;
418 // enable "magic" (read-only) comment AS REQUIRED
419 } else if (useMagic
&& sc
.currentPos
- chToken
== 4
420 && sc
.Match('c') && sc
.chPrev
== 'r' && sc
.GetRelative(-2) == '@')
421 sc
.state
|= 0x10; // (switch to read-only comment style)
425 // handle state change and char coloring AS REQUIRED
427 styler
.ColourTo(chColor
, sc
.state
), sc
.ChangeState(state2
);
428 // move to next char UNLESS re-scanning current char
433 // do any required terminal char coloring (JIC)
437 #ifdef BUILD_AS_EXTERNAL_LEXER
439 #endif /* BUILD_AS_EXTERNAL_LEXER */
441 Sci_PositionU
, Sci_Position
,
448 static const char * const camlWordListDesc
[] = {
449 "Keywords", // primary Objective Caml keywords
450 "Keywords2", // "optional" keywords (typically from Pervasives)
451 "Keywords3", // "optional" keywords (typically typenames)
455 #ifndef BUILD_AS_EXTERNAL_LEXER
456 LexerModule
lmCaml(SCLEX_CAML
, ColouriseCamlDoc
, "caml", FoldCamlDoc
, camlWordListDesc
);
457 #endif /* BUILD_AS_EXTERNAL_LEXER */