Add an UI to enable/disable specific overlay handlers.
[TortoiseGit.git] / ext / scintilla / src / LexCaml.cxx
blob845ac657e148ca09b3dcba039853a8a9a191f1a2
1 // Scintilla source code edit control
2 /** @file LexCaml.cxx
3 ** Lexer for Objective Caml.
4 **/
5 // Copyright 2005 by Robert Roessler <robertr@rftp.com>
6 // The License.txt file describes the conditions under which this software may be distributed.
7 /* Release History
8 20050204 Initial release.
9 20050205 Quick compiler standards/"cleanliness" adjustment.
10 20050206 Added cast for IsLeadByte().
11 20050209 Changes to "external" build support.
12 20050306 Fix for 1st-char-in-doc "corner" case.
13 20050502 Fix for [harmless] one-past-the-end coloring.
14 20050515 Refined numeric token recognition logic.
15 20051125 Added 2nd "optional" keywords class.
16 20051129 Support "magic" (read-only) comments for RCaml.
17 20051204 Swtich to using StyleContext infrastructure.
20 #include <stdlib.h>
21 #include <string.h>
22 #include <ctype.h>
23 #include <stdio.h>
24 #include <stdarg.h>
26 #include "Platform.h"
28 #include "PropSet.h"
29 #include "Accessor.h"
30 #include "StyleContext.h"
31 #include "KeyWords.h"
32 #include "Scintilla.h"
33 #include "SciLexer.h"
35 // Since the Microsoft __iscsym[f] funcs are not ANSI...
36 inline int iscaml(int c) {return isalnum(c) || c == '_';}
37 inline int iscamlf(int c) {return isalpha(c) || c == '_';}
38 inline int iscamld(int c) {return isdigit(c) || c == '_';}
40 static const int baseT[24] = {
41 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* A - L */
42 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0,16 /* M - X */
45 #ifdef SCI_NAMESPACE
46 using namespace Scintilla;
47 #endif
49 #ifdef BUILD_AS_EXTERNAL_LEXER
51 (actually seems to work!)
53 #include "WindowAccessor.h"
54 #include "ExternalLexer.h"
56 #if PLAT_WIN
57 #include <windows.h>
58 #endif
60 static void ColouriseCamlDoc(
61 unsigned int startPos, int length,
62 int initStyle,
63 WordList *keywordlists[],
64 Accessor &styler);
66 static void FoldCamlDoc(
67 unsigned int startPos, int length,
68 int initStyle,
69 WordList *keywordlists[],
70 Accessor &styler);
72 static void InternalLexOrFold(int lexOrFold, unsigned int startPos, int length,
73 int initStyle, char *words[], WindowID window, char *props);
75 static const char* LexerName = "caml";
77 #ifdef TRACE
78 void Platform::DebugPrintf(const char *format, ...) {
79 char buffer[2000];
80 va_list pArguments;
81 va_start(pArguments, format);
82 vsprintf(buffer,format,pArguments);
83 va_end(pArguments);
84 Platform::DebugDisplay(buffer);
86 #else
87 void Platform::DebugPrintf(const char *, ...) {
89 #endif
91 bool Platform::IsDBCSLeadByte(int codePage, char ch) {
92 return ::IsDBCSLeadByteEx(codePage, ch) != 0;
95 long Platform::SendScintilla(WindowID w, unsigned int msg, unsigned long wParam, long lParam) {
96 return ::SendMessage(reinterpret_cast<HWND>(w), msg, wParam, lParam);
99 long Platform::SendScintillaPointer(WindowID w, unsigned int msg, unsigned long wParam, void *lParam) {
100 return ::SendMessage(reinterpret_cast<HWND>(w), msg, wParam,
101 reinterpret_cast<LPARAM>(lParam));
104 void EXT_LEXER_DECL Fold(unsigned int lexer, unsigned int startPos, int length,
105 int initStyle, char *words[], WindowID window, char *props)
107 // below useless evaluation(s) to supress "not used" warnings
108 lexer;
109 // build expected data structures and do the Fold
110 InternalLexOrFold(1, startPos, length, initStyle, words, window, props);
114 int EXT_LEXER_DECL GetLexerCount()
116 return 1; // just us [Objective] Caml lexers here!
119 void EXT_LEXER_DECL GetLexerName(unsigned int Index, char *name, int buflength)
121 // below useless evaluation(s) to supress "not used" warnings
122 Index;
123 // return as much of our lexer name as will fit (what's up with Index?)
124 if (buflength > 0) {
125 buflength--;
126 int n = strlen(LexerName);
127 if (n > buflength)
128 n = buflength;
129 memcpy(name, LexerName, n), name[n] = '\0';
133 void EXT_LEXER_DECL Lex(unsigned int lexer, unsigned int startPos, int length,
134 int initStyle, char *words[], WindowID window, char *props)
136 // below useless evaluation(s) to supress "not used" warnings
137 lexer;
138 // build expected data structures and do the Lex
139 InternalLexOrFold(0, startPos, length, initStyle, words, window, props);
142 static void InternalLexOrFold(int foldOrLex, unsigned int startPos, int length,
143 int initStyle, char *words[], WindowID window, char *props)
145 // create and initialize a WindowAccessor (including contained PropSet)
146 PropSet ps;
147 ps.SetMultiple(props);
148 WindowAccessor wa(window, ps);
149 // create and initialize WordList(s)
150 int nWL = 0;
151 for (; words[nWL]; nWL++) ; // count # of WordList PTRs needed
152 WordList** wl = new WordList* [nWL + 1];// alloc WordList PTRs
153 int i = 0;
154 for (; i < nWL; i++) {
155 wl[i] = new WordList(); // (works or THROWS bad_alloc EXCEPTION)
156 wl[i]->Set(words[i]);
158 wl[i] = 0;
159 // call our "internal" folder/lexer (... then do Flush!)
160 if (foldOrLex)
161 FoldCamlDoc(startPos, length, initStyle, wl, wa);
162 else
163 ColouriseCamlDoc(startPos, length, initStyle, wl, wa);
164 wa.Flush();
165 // clean up before leaving
166 for (i = nWL - 1; i >= 0; i--)
167 delete wl[i];
168 delete [] wl;
171 static
172 #endif /* BUILD_AS_EXTERNAL_LEXER */
174 void ColouriseCamlDoc(
175 unsigned int startPos, int length,
176 int initStyle,
177 WordList *keywordlists[],
178 Accessor &styler)
180 // initialize styler
181 StyleContext sc(startPos, length, initStyle, styler);
182 // set up [initial] state info (terminating states that shouldn't "bleed")
183 int nesting = 0;
184 if (sc.state < SCE_CAML_STRING)
185 sc.state = SCE_CAML_DEFAULT;
186 if (sc.state >= SCE_CAML_COMMENT)
187 nesting = (sc.state & 0x0f) - SCE_CAML_COMMENT;
189 int chBase = 0, chToken = 0, chLit = 0;
190 WordList& keywords = *keywordlists[0];
191 WordList& keywords2 = *keywordlists[1];
192 WordList& keywords3 = *keywordlists[2];
193 const int useMagic = styler.GetPropertyInt("lexer.caml.magic", 0);
195 // foreach char in range...
196 while (sc.More()) {
197 // set up [per-char] state info
198 int state2 = -1; // (ASSUME no state change)
199 int chColor = sc.currentPos - 1;// (ASSUME standard coloring range)
200 bool advance = true; // (ASSUME scanner "eats" 1 char)
202 // step state machine
203 switch (sc.state & 0x0f) {
204 case SCE_CAML_DEFAULT:
205 chToken = sc.currentPos; // save [possible] token start (JIC)
206 // it's wide open; what do we have?
207 if (iscamlf(sc.ch))
208 state2 = SCE_CAML_IDENTIFIER;
209 else if (sc.Match('`') && iscamlf(sc.chNext))
210 state2 = SCE_CAML_TAGNAME;
211 else if (sc.Match('#') && isdigit(sc.chNext))
212 state2 = SCE_CAML_LINENUM;
213 else if (isdigit(sc.ch)) {
214 state2 = SCE_CAML_NUMBER, chBase = 10;
215 if (sc.Match('0') && strchr("bBoOxX", sc.chNext))
216 chBase = baseT[tolower(sc.chNext) - 'a'], sc.Forward();
217 } else if (sc.Match('\'')) /* (char literal?) */
218 state2 = SCE_CAML_CHAR, chLit = 0;
219 else if (sc.Match('\"'))
220 state2 = SCE_CAML_STRING;
221 else if (sc.Match('(', '*'))
222 state2 = SCE_CAML_COMMENT,
223 sc.ch = ' ', // (make SURE "(*)" isn't seen as a closed comment)
224 sc.Forward();
225 else if (strchr("!?~" /* Caml "prefix-symbol" */
226 "=<>@^|&+-*/$%" /* Caml "infix-symbol" */
227 "()[]{};,:.#", sc.ch)) /* Caml "bracket" or ;,:.# */
228 state2 = SCE_CAML_OPERATOR;
229 break;
231 case SCE_CAML_IDENTIFIER:
232 // [try to] interpret as [additional] identifier char
233 if (!(iscaml(sc.ch) || sc.Match('\''))) {
234 const int n = sc.currentPos - chToken;
235 if (n < 24) {
236 // length is believable as keyword, [re-]construct token
237 char t[24];
238 for (int i = -n; i < 0; i++)
239 t[n + i] = static_cast<char>(sc.GetRelative(i));
240 t[n] = '\0';
241 // special-case "_" token as KEYWORD
242 if ((n == 1 && sc.chPrev == '_') || keywords.InList(t))
243 sc.ChangeState(SCE_CAML_KEYWORD);
244 else if (keywords2.InList(t))
245 sc.ChangeState(SCE_CAML_KEYWORD2);
246 else if (keywords3.InList(t))
247 sc.ChangeState(SCE_CAML_KEYWORD3);
249 state2 = SCE_CAML_DEFAULT, advance = false;
251 break;
253 case SCE_CAML_TAGNAME:
254 // [try to] interpret as [additional] tagname char
255 if (!(iscaml(sc.ch) || sc.Match('\'')))
256 state2 = SCE_CAML_DEFAULT, advance = false;
257 break;
259 /*case SCE_CAML_KEYWORD:
260 case SCE_CAML_KEYWORD2:
261 case SCE_CAML_KEYWORD3:
262 // [try to] interpret as [additional] keyword char
263 if (!iscaml(ch))
264 state2 = SCE_CAML_DEFAULT, advance = false;
265 break;*/
267 case SCE_CAML_LINENUM:
268 // [try to] interpret as [additional] linenum directive char
269 if (!isdigit(sc.ch))
270 state2 = SCE_CAML_DEFAULT, advance = false;
271 break;
273 case SCE_CAML_OPERATOR: {
274 // [try to] interpret as [additional] operator char
275 const char* o = 0;
276 if (iscaml(sc.ch) || isspace(sc.ch) /* ident or whitespace */
277 || (o = strchr(")]};,\'\"`#", sc.ch),o)/* "termination" chars */
278 || !strchr("!$%&*+-./:<=>?@^|~", sc.ch)/* "operator" chars */) {
279 // check for INCLUSIVE termination
280 if (o && strchr(")]};,", sc.ch)) {
281 if ((sc.Match(')') && sc.chPrev == '(')
282 || (sc.Match(']') && sc.chPrev == '['))
283 // special-case "()" and "[]" tokens as KEYWORDS
284 sc.ChangeState(SCE_CAML_KEYWORD);
285 chColor++;
286 } else
287 advance = false;
288 state2 = SCE_CAML_DEFAULT;
290 break;
293 case SCE_CAML_NUMBER:
294 // [try to] interpret as [additional] numeric literal char
295 // N.B. - improperly accepts "extra" digits in base 2 or 8 literals
296 if (iscamld(sc.ch) || IsADigit(sc.ch, chBase))
297 break;
298 // how about an integer suffix?
299 if ((sc.Match('l') || sc.Match('L') || sc.Match('n'))
300 && (iscamld(sc.chPrev) || IsADigit(sc.chPrev, chBase)))
301 break;
302 // or a floating-point literal?
303 if (chBase == 10) {
304 // with a decimal point?
305 if (sc.Match('.') && iscamld(sc.chPrev))
306 break;
307 // with an exponent? (I)
308 if ((sc.Match('e') || sc.Match('E'))
309 && (iscamld(sc.chPrev) || sc.chPrev == '.'))
310 break;
311 // with an exponent? (II)
312 if ((sc.Match('+') || sc.Match('-'))
313 && (sc.chPrev == 'e' || sc.chPrev == 'E'))
314 break;
316 // it looks like we have run out of number
317 state2 = SCE_CAML_DEFAULT, advance = false;
318 break;
320 case SCE_CAML_CHAR:
321 // [try to] interpret as [additional] char literal char
322 if (sc.Match('\\')) {
323 chLit = 1; // (definitely IS a char literal)
324 if (sc.chPrev == '\\')
325 sc.ch = ' '; // (so termination test isn't fooled)
326 // should we be terminating - one way or another?
327 } else if ((sc.Match('\'') && sc.chPrev != '\\') || sc.atLineEnd) {
328 state2 = SCE_CAML_DEFAULT;
329 if (sc.Match('\''))
330 chColor++;
331 else
332 sc.ChangeState(SCE_CAML_IDENTIFIER);
333 // ... maybe a char literal, maybe not
334 } else if (chLit < 1 && sc.currentPos - chToken >= 2)
335 sc.ChangeState(SCE_CAML_IDENTIFIER), advance = false;
336 break;
338 case SCE_CAML_STRING:
339 // [try to] interpret as [additional] string literal char
340 if (sc.Match('\\') && sc.chPrev == '\\')
341 sc.ch = ' '; // (so '\\' doesn't cause us trouble)
342 else if (sc.Match('\"') && sc.chPrev != '\\')
343 state2 = SCE_CAML_DEFAULT, chColor++;
344 break;
346 case SCE_CAML_COMMENT:
347 case SCE_CAML_COMMENT1:
348 case SCE_CAML_COMMENT2:
349 case SCE_CAML_COMMENT3:
350 // we're IN a comment - does this start a NESTED comment?
351 if (sc.Match('(', '*'))
352 state2 = sc.state + 1, chToken = sc.currentPos,
353 sc.ch = ' ', // (make SURE "(*)" isn't seen as a closed comment)
354 sc.Forward(), nesting++;
355 // [try to] interpret as [additional] comment char
356 else if (sc.Match(')') && sc.chPrev == '*') {
357 if (nesting)
358 state2 = (sc.state & 0x0f) - 1, chToken = 0, nesting--;
359 else
360 state2 = SCE_CAML_DEFAULT;
361 chColor++;
362 // enable "magic" (read-only) comment AS REQUIRED
363 } else if (useMagic && sc.currentPos - chToken == 4
364 && sc.Match('c') && sc.chPrev == 'r' && sc.GetRelative(-2) == '@')
365 sc.state |= 0x10; // (switch to read-only comment style)
366 break;
369 // handle state change and char coloring as required
370 if (state2 >= 0)
371 styler.ColourTo(chColor, sc.state), sc.ChangeState(state2);
372 // move to next char UNLESS re-scanning current char
373 if (advance)
374 sc.Forward();
377 // do any required terminal char coloring (JIC)
378 sc.Complete();
381 #ifdef BUILD_AS_EXTERNAL_LEXER
382 static
383 #endif /* BUILD_AS_EXTERNAL_LEXER */
384 void FoldCamlDoc(
385 unsigned int startPos, int length,
386 int initStyle,
387 WordList *keywordlists[],
388 Accessor &styler)
390 // below useless evaluation(s) to supress "not used" warnings
391 startPos || length || initStyle || keywordlists[0] || styler.Length();
394 static const char * const camlWordListDesc[] = {
395 "Keywords", // primary Objective Caml keywords
396 "Keywords2", // "optional" keywords (typically from Pervasives)
397 "Keywords3", // "optional" keywords (typically typenames)
401 #ifndef BUILD_AS_EXTERNAL_LEXER
402 LexerModule lmCaml(SCLEX_CAML, ColouriseCamlDoc, "caml", FoldCamlDoc, camlWordListDesc);
403 #endif /* BUILD_AS_EXTERNAL_LEXER */