1 // Scintilla source code edit control
3 ** Lexer for Objective Caml.
5 // Copyright 2005-2009 by Robert Roessler <robertr@rftp.com>
6 // The License.txt file describes the conditions under which this software may be distributed.
8 20050204 Initial release.
9 20050205 Quick compiler standards/"cleanliness" adjustment.
10 20050206 Added cast for IsLeadByte().
11 20050209 Changes to "external" build support.
12 20050306 Fix for 1st-char-in-doc "corner" case.
13 20050502 Fix for [harmless] one-past-the-end coloring.
14 20050515 Refined numeric token recognition logic.
15 20051125 Added 2nd "optional" keywords class.
16 20051129 Support "magic" (read-only) comments for RCaml.
17 20051204 Swtich to using StyleContext infrastructure.
18 20090629 Add full Standard ML '97 support.
30 #include "PropSetSimple.h"
32 #include "StyleContext.h"
34 #include "Scintilla.h"
37 // Since the Microsoft __iscsym[f] funcs are not ANSI...
38 inline int iscaml(int c
) {return isalnum(c
) || c
== '_';}
39 inline int iscamlf(int c
) {return isalpha(c
) || c
== '_';}
41 static const int baseT
[24] = {
42 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* A - L */
43 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0,16 /* M - X */
47 using namespace Scintilla
;
50 #ifdef BUILD_AS_EXTERNAL_LEXER
52 (actually seems to work!)
54 #include "WindowAccessor.h"
55 #include "ExternalLexer.h"
61 static void ColouriseCamlDoc(
62 unsigned int startPos
, int length
,
64 WordList
*keywordlists
[],
67 static void FoldCamlDoc(
68 unsigned int startPos
, int length
,
70 WordList
*keywordlists
[],
73 static void InternalLexOrFold(int lexOrFold
, unsigned int startPos
, int length
,
74 int initStyle
, char *words
[], WindowID window
, char *props
);
76 static const char* LexerName
= "caml";
79 void Platform::DebugPrintf(const char *format
, ...) {
82 va_start(pArguments
, format
);
83 vsprintf(buffer
,format
,pArguments
);
85 Platform::DebugDisplay(buffer
);
88 void Platform::DebugPrintf(const char *, ...) {
92 bool Platform::IsDBCSLeadByte(int codePage
, char ch
) {
93 return ::IsDBCSLeadByteEx(codePage
, ch
) != 0;
96 long Platform::SendScintilla(WindowID w
, unsigned int msg
, unsigned long wParam
, long lParam
) {
97 return ::SendMessage(reinterpret_cast<HWND
>(w
), msg
, wParam
, lParam
);
100 long Platform::SendScintillaPointer(WindowID w
, unsigned int msg
, unsigned long wParam
, void *lParam
) {
101 return ::SendMessage(reinterpret_cast<HWND
>(w
), msg
, wParam
,
102 reinterpret_cast<LPARAM
>(lParam
));
105 void EXT_LEXER_DECL
Fold(unsigned int lexer
, unsigned int startPos
, int length
,
106 int initStyle
, char *words
[], WindowID window
, char *props
)
108 // below useless evaluation(s) to supress "not used" warnings
110 // build expected data structures and do the Fold
111 InternalLexOrFold(1, startPos
, length
, initStyle
, words
, window
, props
);
115 int EXT_LEXER_DECL
GetLexerCount()
117 return 1; // just us [Objective] Caml lexers here!
120 void EXT_LEXER_DECL
GetLexerName(unsigned int Index
, char *name
, int buflength
)
122 // below useless evaluation(s) to supress "not used" warnings
124 // return as much of our lexer name as will fit (what's up with Index?)
127 int n
= strlen(LexerName
);
130 memcpy(name
, LexerName
, n
), name
[n
] = '\0';
134 void EXT_LEXER_DECL
Lex(unsigned int lexer
, unsigned int startPos
, int length
,
135 int initStyle
, char *words
[], WindowID window
, char *props
)
137 // below useless evaluation(s) to supress "not used" warnings
139 // build expected data structures and do the Lex
140 InternalLexOrFold(0, startPos
, length
, initStyle
, words
, window
, props
);
143 static void InternalLexOrFold(int foldOrLex
, unsigned int startPos
, int length
,
144 int initStyle
, char *words
[], WindowID window
, char *props
)
146 // create and initialize a WindowAccessor (including contained PropSet)
148 ps
.SetMultiple(props
);
149 WindowAccessor
wa(window
, ps
);
150 // create and initialize WordList(s)
152 for (; words
[nWL
]; nWL
++) ; // count # of WordList PTRs needed
153 WordList
** wl
= new WordList
* [nWL
+ 1];// alloc WordList PTRs
155 for (; i
< nWL
; i
++) {
156 wl
[i
] = new WordList(); // (works or THROWS bad_alloc EXCEPTION)
157 wl
[i
]->Set(words
[i
]);
160 // call our "internal" folder/lexer (... then do Flush!)
162 FoldCamlDoc(startPos
, length
, initStyle
, wl
, wa
);
164 ColouriseCamlDoc(startPos
, length
, initStyle
, wl
, wa
);
166 // clean up before leaving
167 for (i
= nWL
- 1; i
>= 0; i
--)
173 #endif /* BUILD_AS_EXTERNAL_LEXER */
175 void ColouriseCamlDoc(
176 unsigned int startPos
, int length
,
178 WordList
*keywordlists
[],
182 StyleContext
sc(startPos
, length
, initStyle
, styler
);
184 int chBase
= 0, chToken
= 0, chLit
= 0;
185 WordList
& keywords
= *keywordlists
[0];
186 WordList
& keywords2
= *keywordlists
[1];
187 WordList
& keywords3
= *keywordlists
[2];
188 const bool isSML
= keywords
.InList("andalso");
189 const int useMagic
= styler
.GetPropertyInt("lexer.caml.magic", 0);
191 // set up [initial] state info (terminating states that shouldn't "bleed")
192 const int state_
= sc
.state
& 0x0f;
193 if (state_
<= SCE_CAML_CHAR
194 || (isSML
&& state_
== SCE_CAML_STRING
))
195 sc
.state
= SCE_CAML_DEFAULT
;
196 int nesting
= (state_
>= SCE_CAML_COMMENT
)? (state_
- SCE_CAML_COMMENT
): 0;
198 // foreach char in range...
200 // set up [per-char] state info
201 int state2
= -1; // (ASSUME no state change)
202 int chColor
= sc
.currentPos
- 1;// (ASSUME standard coloring range)
203 bool advance
= true; // (ASSUME scanner "eats" 1 char)
205 // step state machine
206 switch (sc
.state
& 0x0f) {
207 case SCE_CAML_DEFAULT
:
208 chToken
= sc
.currentPos
; // save [possible] token start (JIC)
209 // it's wide open; what do we have?
211 state2
= SCE_CAML_IDENTIFIER
;
212 else if (!isSML
&& sc
.Match('`') && iscamlf(sc
.chNext
))
213 state2
= SCE_CAML_TAGNAME
;
214 else if (!isSML
&& sc
.Match('#') && isdigit(sc
.chNext
))
215 state2
= SCE_CAML_LINENUM
;
216 else if (isdigit(sc
.ch
)) {
217 // it's a number, assume base 10
218 state2
= SCE_CAML_NUMBER
, chBase
= 10;
220 // there MAY be a base specified...
221 const char* baseC
= "bBoOxX";
223 if (sc
.chNext
== 'w')
224 sc
.Forward(); // (consume SML "word" indicator)
227 // ... change to specified base AS REQUIRED
228 if (strchr(baseC
, sc
.chNext
))
229 chBase
= baseT
[tolower(sc
.chNext
) - 'a'], sc
.Forward();
231 } else if (!isSML
&& sc
.Match('\'')) // (Caml char literal?)
232 state2
= SCE_CAML_CHAR
, chLit
= 0;
233 else if (isSML
&& sc
.Match('#', '"')) // (SML char literal?)
234 state2
= SCE_CAML_CHAR
, sc
.Forward();
235 else if (sc
.Match('"'))
236 state2
= SCE_CAML_STRING
;
237 else if (sc
.Match('(', '*'))
238 state2
= SCE_CAML_COMMENT
, sc
.Forward(), sc
.ch
= ' '; // (*)...
239 else if (strchr("!?~" /* Caml "prefix-symbol" */
240 "=<>@^|&+-*/$%" /* Caml "infix-symbol" */
241 "()[]{};,:.#", sc
.ch
) // Caml "bracket" or ;,:.#
242 // SML "extra" ident chars
243 || (isSML
&& (sc
.Match('\\') || sc
.Match('`'))))
244 state2
= SCE_CAML_OPERATOR
;
247 case SCE_CAML_IDENTIFIER
:
248 // [try to] interpret as [additional] identifier char
249 if (!(iscaml(sc
.ch
) || sc
.Match('\''))) {
250 const int n
= sc
.currentPos
- chToken
;
252 // length is believable as keyword, [re-]construct token
254 for (int i
= -n
; i
< 0; i
++)
255 t
[n
+ i
] = static_cast<char>(sc
.GetRelative(i
));
257 // special-case "_" token as KEYWORD
258 if ((n
== 1 && sc
.chPrev
== '_') || keywords
.InList(t
))
259 sc
.ChangeState(SCE_CAML_KEYWORD
);
260 else if (keywords2
.InList(t
))
261 sc
.ChangeState(SCE_CAML_KEYWORD2
);
262 else if (keywords3
.InList(t
))
263 sc
.ChangeState(SCE_CAML_KEYWORD3
);
265 state2
= SCE_CAML_DEFAULT
, advance
= false;
269 case SCE_CAML_TAGNAME
:
270 // [try to] interpret as [additional] tagname char
271 if (!(iscaml(sc
.ch
) || sc
.Match('\'')))
272 state2
= SCE_CAML_DEFAULT
, advance
= false;
275 /*case SCE_CAML_KEYWORD:
276 case SCE_CAML_KEYWORD2:
277 case SCE_CAML_KEYWORD3:
278 // [try to] interpret as [additional] keyword char
280 state2 = SCE_CAML_DEFAULT, advance = false;
283 case SCE_CAML_LINENUM
:
284 // [try to] interpret as [additional] linenum directive char
286 state2
= SCE_CAML_DEFAULT
, advance
= false;
289 case SCE_CAML_OPERATOR
: {
290 // [try to] interpret as [additional] operator char
292 if (iscaml(sc
.ch
) || isspace(sc
.ch
) // ident or whitespace
293 || (o
= strchr(")]};,\'\"#", sc
.ch
),o
) // "termination" chars
294 || (!isSML
&& sc
.Match('`')) // Caml extra term char
295 || (!strchr("!$%&*+-./:<=>?@^|~", sc
.ch
)// "operator" chars
296 // SML extra ident chars
297 && !(isSML
&& (sc
.Match('\\') || sc
.Match('`'))))) {
298 // check for INCLUSIVE termination
299 if (o
&& strchr(")]};,", sc
.ch
)) {
300 if ((sc
.Match(')') && sc
.chPrev
== '(')
301 || (sc
.Match(']') && sc
.chPrev
== '['))
302 // special-case "()" and "[]" tokens as KEYWORDS
303 sc
.ChangeState(SCE_CAML_KEYWORD
);
307 state2
= SCE_CAML_DEFAULT
;
312 case SCE_CAML_NUMBER
:
313 // [try to] interpret as [additional] numeric literal char
314 if ((!isSML
&& sc
.Match('_')) || IsADigit(sc
.ch
, chBase
))
316 // how about an integer suffix?
317 if (!isSML
&& (sc
.Match('l') || sc
.Match('L') || sc
.Match('n'))
318 && (sc
.chPrev
== '_' || IsADigit(sc
.chPrev
, chBase
)))
320 // or a floating-point literal?
322 // with a decimal point?
324 && ((!isSML
&& sc
.chPrev
== '_')
325 || IsADigit(sc
.chPrev
, chBase
)))
327 // with an exponent? (I)
328 if ((sc
.Match('e') || sc
.Match('E'))
329 && ((!isSML
&& (sc
.chPrev
== '.' || sc
.chPrev
== '_'))
330 || IsADigit(sc
.chPrev
, chBase
)))
332 // with an exponent? (II)
333 if (((!isSML
&& (sc
.Match('+') || sc
.Match('-')))
334 || (isSML
&& sc
.Match('~')))
335 && (sc
.chPrev
== 'e' || sc
.chPrev
== 'E'))
338 // it looks like we have run out of number
339 state2
= SCE_CAML_DEFAULT
, advance
= false;
344 // [try to] interpret as [additional] char literal char
345 if (sc
.Match('\\')) {
346 chLit
= 1; // (definitely IS a char literal)
347 if (sc
.chPrev
== '\\')
348 sc
.ch
= ' '; // (...\\')
349 // should we be terminating - one way or another?
350 } else if ((sc
.Match('\'') && sc
.chPrev
!= '\\')
352 state2
= SCE_CAML_DEFAULT
;
356 sc
.ChangeState(SCE_CAML_IDENTIFIER
);
357 // ... maybe a char literal, maybe not
358 } else if (chLit
< 1 && sc
.currentPos
- chToken
>= 2)
359 sc
.ChangeState(SCE_CAML_IDENTIFIER
), advance
= false;
362 // fall through for SML char literal (handle like string) */
364 case SCE_CAML_STRING
:
365 // [try to] interpret as [additional] [SML char/] string literal char
366 if (isSML
&& sc
.Match('\\') && sc
.chPrev
!= '\\' && isspace(sc
.chNext
))
367 state2
= SCE_CAML_WHITE
;
368 else if (sc
.Match('\\') && sc
.chPrev
== '\\')
369 sc
.ch
= ' '; // (...\\")
370 // should we be terminating - one way or another?
371 else if ((sc
.Match('"') && sc
.chPrev
!= '\\')
372 || (isSML
&& sc
.atLineEnd
)) {
373 state2
= SCE_CAML_DEFAULT
;
380 // [try to] interpret as [additional] SML embedded whitespace char
381 if (sc
.Match('\\')) {
382 // style this puppy NOW...
383 state2
= SCE_CAML_STRING
, sc
.ch
= ' ' /* (...\") */, chColor
++,
384 styler
.ColourTo(chColor
, SCE_CAML_WHITE
), styler
.Flush();
385 // ... then backtrack to determine original SML literal type
387 for (; p
>= 0 && styler
.StyleAt(p
) == SCE_CAML_WHITE
; p
--) ;
389 state2
= static_cast<int>(styler
.StyleAt(p
));
390 // take care of state change NOW
391 sc
.ChangeState(state2
), state2
= -1;
395 case SCE_CAML_COMMENT
:
396 case SCE_CAML_COMMENT1
:
397 case SCE_CAML_COMMENT2
:
398 case SCE_CAML_COMMENT3
:
399 // we're IN a comment - does this start a NESTED comment?
400 if (sc
.Match('(', '*'))
401 state2
= sc
.state
+ 1, chToken
= sc
.currentPos
,
402 sc
.Forward(), sc
.ch
= ' ' /* (*)... */, nesting
++;
403 // [try to] interpret as [additional] comment char
404 else if (sc
.Match(')') && sc
.chPrev
== '*') {
406 state2
= (sc
.state
& 0x0f) - 1, chToken
= 0, nesting
--;
408 state2
= SCE_CAML_DEFAULT
;
410 // enable "magic" (read-only) comment AS REQUIRED
411 } else if (useMagic
&& sc
.currentPos
- chToken
== 4
412 && sc
.Match('c') && sc
.chPrev
== 'r' && sc
.GetRelative(-2) == '@')
413 sc
.state
|= 0x10; // (switch to read-only comment style)
417 // handle state change and char coloring AS REQUIRED
419 styler
.ColourTo(chColor
, sc
.state
), sc
.ChangeState(state2
);
420 // move to next char UNLESS re-scanning current char
425 // do any required terminal char coloring (JIC)
429 #ifdef BUILD_AS_EXTERNAL_LEXER
431 #endif /* BUILD_AS_EXTERNAL_LEXER */
433 unsigned int startPos
, int length
,
435 WordList
*keywordlists
[],
438 // below useless evaluation(s) to supress "not used" warnings
439 startPos
|| length
|| initStyle
|| keywordlists
[0] || styler
.Length();
442 static const char * const camlWordListDesc
[] = {
443 "Keywords", // primary Objective Caml keywords
444 "Keywords2", // "optional" keywords (typically from Pervasives)
445 "Keywords3", // "optional" keywords (typically typenames)
449 #ifndef BUILD_AS_EXTERNAL_LEXER
450 LexerModule
lmCaml(SCLEX_CAML
, ColouriseCamlDoc
, "caml", FoldCamlDoc
, camlWordListDesc
);
451 #endif /* BUILD_AS_EXTERNAL_LEXER */