Merge pull request #11 from esorton/bugfix/add-constexpr-keyword-to-arduino-ctags
[arduino-ctags.git] / ocaml.c
blob8fd68725277551e0ec2b78321b82b963ada2c884
1 /*
2 * Copyright (c) 2009, Vincent Berthoux
4 * This source code is released for free distribution under the terms of the
5 * GNU General Public License.
7 * This module contains functions for generating tags for Objective Caml
8 * language files.
9 */
11 * INCLUDE FILES
13 #include "general.h" /* must always come first */
15 #include <string.h>
17 #include "keyword.h"
18 #include "entry.h"
19 #include "options.h"
20 #include "read.h"
21 #include "routines.h"
22 #include "vstring.h"
24 /* To get rid of unused parameter warning in
25 * -Wextra */
26 #ifdef UNUSED
27 #elif defined(__GNUC__)
28 # define UNUSED(x) UNUSED_ ## x __attribute__((unused))
29 #elif defined(__LCLINT__)
30 # define UNUSED(x) /*@unused@*/ x
31 #else
32 # define UNUSED(x) x
33 #endif
34 #define OCAML_MAX_STACK_SIZE 256
36 typedef enum {
37 K_CLASS, /* Ocaml class, relatively rare */
38 K_METHOD, /* class method */
39 K_MODULE, /* Ocaml module OR functor */
40 K_VAR,
41 K_TYPE, /* name of an OCaml type */
42 K_FUNCTION,
43 K_CONSTRUCTOR, /* Constructor of a sum type */
44 K_RECORDFIELD,
45 K_EXCEPTION
46 } ocamlKind;
48 static kindOption OcamlKinds[] = {
49 {TRUE, 'c', "class", "classes"},
50 {TRUE, 'm', "method", "Object's method"},
51 {TRUE, 'M', "module", "Module or functor"},
52 {TRUE, 'v', "var", "Global variable"},
53 {TRUE, 't', "type", "Type name"},
54 {TRUE, 'f', "function", "A function"},
55 {TRUE, 'C', "Constructor", "A constructor"},
56 {TRUE, 'r', "Record field", "A 'structure' field"},
57 {TRUE, 'e', "Exception", "An exception"}
60 typedef enum {
61 OcaKEYWORD_and,
62 OcaKEYWORD_begin,
63 OcaKEYWORD_class,
64 OcaKEYWORD_do,
65 OcaKEYWORD_done,
66 OcaKEYWORD_else,
67 OcaKEYWORD_end,
68 OcaKEYWORD_exception,
69 OcaKEYWORD_for,
70 OcaKEYWORD_functor,
71 OcaKEYWORD_fun,
72 OcaKEYWORD_if,
73 OcaKEYWORD_in,
74 OcaKEYWORD_let,
75 OcaKEYWORD_match,
76 OcaKEYWORD_method,
77 OcaKEYWORD_module,
78 OcaKEYWORD_mutable,
79 OcaKEYWORD_object,
80 OcaKEYWORD_of,
81 OcaKEYWORD_rec,
82 OcaKEYWORD_sig,
83 OcaKEYWORD_struct,
84 OcaKEYWORD_then,
85 OcaKEYWORD_try,
86 OcaKEYWORD_type,
87 OcaKEYWORD_val,
88 OcaKEYWORD_virtual,
89 OcaKEYWORD_while,
90 OcaKEYWORD_with,
92 OcaIDENTIFIER,
93 Tok_PARL, /* '(' */
94 Tok_PARR, /* ')' */
95 Tok_BRL, /* '[' */
96 Tok_BRR, /* ']' */
97 Tok_CurlL, /* '{' */
98 Tok_CurlR, /* '}' */
99 Tok_Prime, /* '\'' */
100 Tok_Pipe, /* '|' */
101 Tok_EQ, /* '=' */
102 Tok_Val, /* string/number/poo */
103 Tok_Op, /* any operator recognized by the language */
104 Tok_semi, /* ';' */
105 Tok_comma, /* ',' */
106 Tok_To, /* '->' */
107 Tok_Sharp, /* '#' */
108 Tok_Backslash, /* '\\' */
110 Tok_EOF /* END of file */
111 } ocamlKeyword;
113 typedef struct sOcaKeywordDesc {
114 const char *name;
115 ocamlKeyword id;
116 } ocaKeywordDesc;
118 typedef ocamlKeyword ocaToken;
120 static const ocaKeywordDesc OcamlKeywordTable[] = {
121 { "and" , OcaKEYWORD_and },
122 { "begin" , OcaKEYWORD_begin },
123 { "class" , OcaKEYWORD_class },
124 { "do" , OcaKEYWORD_do },
125 { "done" , OcaKEYWORD_done },
126 { "else" , OcaKEYWORD_else },
127 { "end" , OcaKEYWORD_end },
128 { "exception" , OcaKEYWORD_exception },
129 { "for" , OcaKEYWORD_for },
130 { "fun" , OcaKEYWORD_fun },
131 { "function" , OcaKEYWORD_fun },
132 { "functor" , OcaKEYWORD_functor },
133 { "in" , OcaKEYWORD_in },
134 { "let" , OcaKEYWORD_let },
135 { "match" , OcaKEYWORD_match },
136 { "method" , OcaKEYWORD_method },
137 { "module" , OcaKEYWORD_module },
138 { "mutable" , OcaKEYWORD_mutable },
139 { "object" , OcaKEYWORD_object },
140 { "of" , OcaKEYWORD_of },
141 { "rec" , OcaKEYWORD_rec },
142 { "sig" , OcaKEYWORD_sig },
143 { "struct" , OcaKEYWORD_struct },
144 { "then" , OcaKEYWORD_then },
145 { "try" , OcaKEYWORD_try },
146 { "type" , OcaKEYWORD_type },
147 { "val" , OcaKEYWORD_val },
148 { "value" , OcaKEYWORD_let }, /* just to handle revised syntax */
149 { "virtual" , OcaKEYWORD_virtual },
150 { "while" , OcaKEYWORD_while },
151 { "with" , OcaKEYWORD_with },
153 { "or" , Tok_Op },
154 { "mod " , Tok_Op },
155 { "land " , Tok_Op },
156 { "lor " , Tok_Op },
157 { "lxor " , Tok_Op },
158 { "lsl " , Tok_Op },
159 { "lsr " , Tok_Op },
160 { "asr" , Tok_Op },
161 { "->" , Tok_To },
162 { "true" , Tok_Val },
163 { "false" , Tok_Val }
166 static langType Lang_Ocaml;
168 boolean exportLocalInfo = FALSE;
170 /*//////////////////////////////////////////////////////////////////
171 //// lexingInit */
172 typedef struct _lexingState {
173 vString *name; /* current parsed identifier/operator */
174 const unsigned char *cp; /* position in stream */
175 } lexingState;
177 /* array of the size of all possible value for a char */
178 boolean isOperator[1 << (8 * sizeof (char))] = { FALSE };
180 static void initKeywordHash ( void )
182 const size_t count = sizeof (OcamlKeywordTable) / sizeof (ocaKeywordDesc);
183 size_t i;
185 for (i = 0; i < count; ++i)
187 addKeyword (OcamlKeywordTable[i].name, Lang_Ocaml,
188 (int) OcamlKeywordTable[i].id);
192 /* definition of all the operator in OCaml,
193 * /!\ certain operator get special treatment
194 * in regards of their role in OCaml grammar :
195 * '|' ':' '=' '~' and '?' */
196 static void initOperatorTable ( void )
198 isOperator['!'] = TRUE;
199 isOperator['$'] = TRUE;
200 isOperator['%'] = TRUE;
201 isOperator['&'] = TRUE;
202 isOperator['*'] = TRUE;
203 isOperator['+'] = TRUE;
204 isOperator['-'] = TRUE;
205 isOperator['.'] = TRUE;
206 isOperator['/'] = TRUE;
207 isOperator[':'] = TRUE;
208 isOperator['<'] = TRUE;
209 isOperator['='] = TRUE;
210 isOperator['>'] = TRUE;
211 isOperator['?'] = TRUE;
212 isOperator['@'] = TRUE;
213 isOperator['^'] = TRUE;
214 isOperator['~'] = TRUE;
215 isOperator['|'] = TRUE;
218 /*//////////////////////////////////////////////////////////////////////
219 //// Lexing */
220 static boolean isNum (char c)
222 return c >= '0' && c <= '9';
224 static boolean isLowerAlpha (char c)
226 return c >= 'a' && c <= 'z';
229 static boolean isUpperAlpha (char c)
231 return c >= 'A' && c <= 'Z';
234 static boolean isAlpha (char c)
236 return isLowerAlpha (c) || isUpperAlpha (c);
239 static boolean isIdent (char c)
241 return isNum (c) || isAlpha (c) || c == '_' || c == '\'';
244 static boolean isSpace (char c)
246 return c == ' ' || c == '\t' || c == '\r' || c == '\n';
249 static void eatWhiteSpace (lexingState * st)
251 const unsigned char *cp = st->cp;
252 while (isSpace (*cp))
253 cp++;
255 st->cp = cp;
258 static void eatString (lexingState * st)
260 boolean lastIsBackSlash = FALSE;
261 boolean unfinished = TRUE;
262 const unsigned char *c = st->cp + 1;
264 while (unfinished)
266 /* end of line should never happen.
267 * we tolerate it */
268 if (c == NULL || c[0] == '\0')
269 break;
270 else if (*c == '"' && !lastIsBackSlash)
271 unfinished = FALSE;
272 else
273 lastIsBackSlash = *c == '\\';
275 c++;
278 st->cp = c;
281 static void eatComment (lexingState * st)
283 boolean unfinished = TRUE;
284 boolean lastIsStar = FALSE;
285 const unsigned char *c = st->cp + 2;
287 while (unfinished)
289 /* we've reached the end of the line..
290 * so we have to reload a line... */
291 if (c == NULL || *c == '\0')
293 st->cp = fileReadLine ();
294 /* WOOPS... no more input...
295 * we return, next lexing read
296 * will be null and ok */
297 if (st->cp == NULL)
298 return;
299 c = st->cp;
300 continue;
302 /* we've reached the end of the comment */
303 else if (*c == ')' && lastIsStar)
304 unfinished = FALSE;
305 /* here we deal with imbricated comment, which
306 * are allowed in OCaml */
307 else if (c[0] == '(' && c[1] == '*')
309 st->cp = c;
310 eatComment (st);
311 c = st->cp;
312 lastIsStar = FALSE;
314 else
315 lastIsStar = '*' == *c;
317 c++;
320 st->cp = c;
323 static void readIdentifier (lexingState * st)
325 const unsigned char *p;
326 vStringClear (st->name);
328 /* first char is a simple letter */
329 if (isAlpha (*st->cp) || *st->cp == '_')
330 vStringPut (st->name, (int) *st->cp);
332 /* Go till you get identifier chars */
333 for (p = st->cp + 1; isIdent (*p); p++)
334 vStringPut (st->name, (int) *p);
336 st->cp = p;
338 vStringTerminate (st->name);
341 static ocamlKeyword eatNumber (lexingState * st)
343 while (isNum (*st->cp))
344 st->cp++;
345 return Tok_Val;
348 /* Operator can be defined in OCaml as a function
349 * so we must be ample enough to parse them normally */
350 static ocamlKeyword eatOperator (lexingState * st)
352 int count = 0;
353 const unsigned char *root = st->cp;
355 vStringClear (st->name);
357 while (isOperator[st->cp[count]])
359 vStringPut (st->name, st->cp[count]);
360 count++;
363 vStringTerminate (st->name);
365 st->cp += count;
366 if (count <= 1)
368 switch (root[0])
370 case '|':
371 return Tok_Pipe;
372 case '=':
373 return Tok_EQ;
374 default:
375 return Tok_Op;
378 else if (count == 2 && root[0] == '-' && root[1] == '>')
379 return Tok_To;
380 else
381 return Tok_Op;
384 /* The lexer is in charge of reading the file.
385 * Some of sub-lexer (like eatComment) also read file.
386 * lexing is finished when the lexer return Tok_EOF */
387 static ocamlKeyword lex (lexingState * st)
389 int retType;
390 /* handling data input here */
391 while (st->cp == NULL || st->cp[0] == '\0')
393 st->cp = fileReadLine ();
394 if (st->cp == NULL)
395 return Tok_EOF;
398 if (isAlpha (*st->cp))
400 readIdentifier (st);
401 retType = lookupKeyword (vStringValue (st->name), Lang_Ocaml);
403 if (retType == -1) /* If it's not a keyword */
405 return OcaIDENTIFIER;
407 else
409 return retType;
412 else if (isNum (*st->cp))
413 return eatNumber (st);
414 else if (isSpace (*st->cp))
416 eatWhiteSpace (st);
417 return lex (st);
419 /* OCaml permit the definition of our own operators
420 * so here we check all the consecuting chars which
421 * are operators to discard them. */
422 else if (isOperator[*st->cp])
423 return eatOperator (st);
424 else
425 switch (*st->cp)
427 case '(':
428 if (st->cp[1] == '*') /* ergl, a comment */
430 eatComment (st);
431 return lex (st);
433 else
435 st->cp++;
436 return Tok_PARL;
439 case ')':
440 st->cp++;
441 return Tok_PARR;
442 case '[':
443 st->cp++;
444 return Tok_BRL;
445 case ']':
446 st->cp++;
447 return Tok_BRR;
448 case '{':
449 st->cp++;
450 return Tok_CurlL;
451 case '}':
452 st->cp++;
453 return Tok_CurlR;
454 case '\'':
455 st->cp++;
456 return Tok_Prime;
457 case ',':
458 st->cp++;
459 return Tok_comma;
460 case '=':
461 st->cp++;
462 return Tok_EQ;
463 case ';':
464 st->cp++;
465 return Tok_semi;
466 case '"':
467 eatString (st);
468 return Tok_Val;
469 case '_':
470 st->cp++;
471 return Tok_Val;
472 case '#':
473 st->cp++;
474 return Tok_Sharp;
475 case '\\':
476 st->cp++;
477 return Tok_Backslash;
479 default:
480 st->cp++;
481 break;
484 /* default return if nothing is recognized,
485 * shouldn't happen, but at least, it will
486 * be handled without destroying the parsing. */
487 return Tok_Val;
490 /*//////////////////////////////////////////////////////////////////////
491 //// Parsing */
492 typedef void (*parseNext) (vString * const ident, ocaToken what);
494 /********** Helpers */
495 /* This variable hold the 'parser' which is going to
496 * handle the next token */
497 parseNext toDoNext;
499 /* Special variable used by parser eater to
500 * determine which action to put after their
501 * job is finished. */
502 parseNext comeAfter;
504 /* If a token put an end to current delcaration/
505 * statement */
506 ocaToken terminatingToken;
508 /* Token to be searched by the different
509 * parser eater. */
510 ocaToken waitedToken;
512 /* name of the last class, used for
513 * context stacking. */
514 vString *lastClass;
516 vString *voidName;
518 typedef enum _sContextKind {
519 ContextStrong,
520 ContextSoft
521 } contextKind;
523 typedef enum _sContextType {
524 ContextType,
525 ContextModule,
526 ContextClass,
527 ContextValue,
528 ContextFunction,
529 ContextMethod,
530 ContextBlock
531 } contextType;
533 typedef struct _sOcamlContext {
534 contextKind kind; /* well if the context is strong or not */
535 contextType type;
536 parseNext callback; /* what to do when a context is pop'd */
537 vString *contextName; /* name, if any, of the surrounding context */
538 } ocamlContext;
540 /* context stack, can be used to output scope information
541 * into the tag file. */
542 ocamlContext stack[OCAML_MAX_STACK_SIZE];
543 /* current position in the tag */
544 int stackIndex;
546 /* special function, often recalled, so putting it here */
547 static void globalScope (vString * const ident, ocaToken what);
549 /* Return : index of the last named context if one
550 * is found, -1 otherwise */
551 static int getLastNamedIndex ( void )
553 int i;
555 for (i = stackIndex - 1; i >= 0; --i)
557 if (stack[i].contextName->buffer &&
558 strlen (stack[i].contextName->buffer) > 0)
560 return i;
564 return -1;
567 static const char *contextDescription (contextType t)
569 switch (t)
571 case ContextFunction:
572 return "function";
573 case ContextMethod:
574 return "method";
575 case ContextValue:
576 return "value";
577 case ContextModule:
578 return "Module";
579 case ContextType:
580 return "type";
581 case ContextClass:
582 return "class";
583 case ContextBlock:
584 return "begin/end";
587 return NULL;
590 static char contextTypeSuffix (contextType t)
592 switch (t)
594 case ContextFunction:
595 case ContextMethod:
596 case ContextValue:
597 case ContextModule:
598 return '/';
599 case ContextType:
600 return '.';
601 case ContextClass:
602 return '#';
603 case ContextBlock:
604 return ' ';
607 return '$';
610 /* Push a new context, handle null string */
611 static void pushContext (contextKind kind, contextType type, parseNext after,
612 vString const *contextName)
614 int parentIndex;
616 if (stackIndex >= OCAML_MAX_STACK_SIZE)
618 verbose ("OCaml Maximum depth reached");
619 return;
623 stack[stackIndex].kind = kind;
624 stack[stackIndex].type = type;
625 stack[stackIndex].callback = after;
627 parentIndex = getLastNamedIndex ();
628 if (contextName == NULL)
630 vStringClear (stack[stackIndex++].contextName);
631 return;
634 if (parentIndex >= 0)
636 vStringCopy (stack[stackIndex].contextName,
637 stack[parentIndex].contextName);
638 vStringPut (stack[stackIndex].contextName,
639 contextTypeSuffix (stack[parentIndex].type));
641 vStringCat (stack[stackIndex].contextName, contextName);
643 else
644 vStringCopy (stack[stackIndex].contextName, contextName);
646 stackIndex++;
649 static void pushStrongContext (vString * name, contextType type)
651 pushContext (ContextStrong, type, &globalScope, name);
654 static void pushSoftContext (parseNext continuation,
655 vString * name, contextType type)
657 pushContext (ContextSoft, type, continuation, name);
660 static void pushEmptyContext (parseNext continuation)
662 pushContext (ContextSoft, ContextValue, continuation, NULL);
665 /* unroll the stack until the last named context.
666 * then discard it. Used to handle the :
667 * let f x y = ...
668 * in ...
669 * where the context is reseted after the in. Context may have
670 * been really nested before that. */
671 static void popLastNamed ( void )
673 int i = getLastNamedIndex ();
675 if (i >= 0)
677 stackIndex = i;
678 toDoNext = stack[i].callback;
679 vStringClear (stack[i].contextName);
681 else
683 /* ok, no named context found...
684 * (should not happen). */
685 stackIndex = 0;
686 toDoNext = &globalScope;
690 /* pop a context without regarding it's content
691 * (beside handling empty stack case) */
692 static void popSoftContext ( void )
694 if (stackIndex <= 0)
696 toDoNext = &globalScope;
698 else
700 stackIndex--;
701 toDoNext = stack[stackIndex].callback;
702 vStringClear (stack[stackIndex].contextName);
706 /* Reset everything until the last global space.
707 * a strong context can be :
708 * - module
709 * - class definition
710 * - the initial global space
711 * - a _global_ delcaration (let at global scope or in a module).
712 * Created to exit quickly deeply nested context */
713 static contextType popStrongContext ( void )
715 int i;
717 for (i = stackIndex - 1; i >= 0; --i)
719 if (stack[i].kind == ContextStrong)
721 stackIndex = i;
722 toDoNext = stack[i].callback;
723 vStringClear (stack[i].contextName);
724 return stack[i].type;
727 /* ok, no strong context found... */
728 stackIndex = 0;
729 toDoNext = &globalScope;
730 return -1;
733 /* Ignore everything till waitedToken and jump to comeAfter.
734 * If the "end" keyword is encountered break, doesn't remember
735 * why though. */
736 static void tillToken (vString * const UNUSED (ident), ocaToken what)
738 if (what == waitedToken)
739 toDoNext = comeAfter;
740 else if (what == OcaKEYWORD_end)
742 popStrongContext ();
743 toDoNext = &globalScope;
747 /* Ignore everything till a waitedToken is seen, but
748 * take care of balanced parentheses/bracket use */
749 static void contextualTillToken (vString * const UNUSED (ident), ocaToken what)
751 static int parentheses = 0;
752 static int bracket = 0;
753 static int curly = 0;
755 switch (what)
757 case Tok_PARL:
758 parentheses--;
759 break;
760 case Tok_PARR:
761 parentheses++;
762 break;
763 case Tok_CurlL:
764 curly--;
765 break;
766 case Tok_CurlR:
767 curly++;
768 break;
769 case Tok_BRL:
770 bracket--;
771 break;
772 case Tok_BRR:
773 bracket++;
774 break;
776 default: /* other token are ignored */
777 break;
780 if (what == waitedToken && parentheses == 0 && bracket == 0 && curly == 0)
781 toDoNext = comeAfter;
783 else if (what == OcaKEYWORD_end)
785 popStrongContext ();
786 toDoNext = &globalScope;
790 /* Wait for waitedToken and jump to comeAfter or let
791 * the globalScope handle declarations */
792 static void tillTokenOrFallback (vString * const ident, ocaToken what)
794 if (what == waitedToken)
795 toDoNext = comeAfter;
796 else
797 globalScope (ident, what);
800 /* ignore token till waitedToken, or give up if find
801 * terminatingToken. Use globalScope to handle new
802 * declarations. */
803 static void tillTokenOrTerminatingOrFallback (vString * const ident,
804 ocaToken what)
806 if (what == waitedToken)
807 toDoNext = comeAfter;
808 else if (what == terminatingToken)
809 toDoNext = globalScope;
810 else
811 globalScope (ident, what);
814 /* ignore the next token in the stream and jump to the
815 * given comeAfter state */
816 static void ignoreToken (vString * const UNUSED (ident), ocaToken UNUSED (what))
818 toDoNext = comeAfter;
821 /********** Grammar */
822 /* the purpose of each function is detailled near their
823 * implementation */
825 static void killCurrentState ( void )
828 /* Tracking the kind of previous strong
829 * context, if it doesn't match with a
830 * really strong entity, repop */
831 switch (popStrongContext ())
834 case ContextValue:
835 popStrongContext ();
836 break;
837 case ContextFunction:
838 popStrongContext ();
839 break;
840 case ContextMethod:
841 popStrongContext ();
842 break;
844 case ContextType:
845 popStrongContext();
846 break;
847 case ContextBlock:
848 break;
849 case ContextModule:
850 break;
851 case ContextClass:
852 break;
853 default:
854 /* nothing more */
855 break;
859 /* used to prepare tag for OCaml, just in case their is a need to
860 * add additional information to the tag. */
861 static void prepareTag (tagEntryInfo * tag, vString const *name, ocamlKind kind)
863 int parentIndex;
865 initTagEntry (tag, vStringValue (name));
866 tag->kindName = OcamlKinds[kind].name;
867 tag->kind = OcamlKinds[kind].letter;
869 parentIndex = getLastNamedIndex ();
870 if (parentIndex >= 0)
872 tag->extensionFields.scope[0] =
873 contextDescription (stack[parentIndex].type);
874 tag->extensionFields.scope[1] =
875 vStringValue (stack[parentIndex].contextName);
879 /* Used to centralise tag creation, and be able to add
880 * more information to it in the future */
881 static void addTag (vString * const ident, int kind)
883 tagEntryInfo toCreate;
884 prepareTag (&toCreate, ident, kind);
885 makeTagEntry (&toCreate);
888 boolean needStrongPoping = FALSE;
889 static void requestStrongPoping ( void )
891 needStrongPoping = TRUE;
894 static void cleanupPreviousParser ( void )
896 if (needStrongPoping)
898 needStrongPoping = FALSE;
899 popStrongContext ();
903 /* Due to some circular dependencies, the following functions
904 * must be forward-declared. */
905 static void letParam (vString * const ident, ocaToken what);
906 static void localScope (vString * const ident, ocaToken what);
907 static void mayRedeclare (vString * const ident, ocaToken what);
908 static void typeSpecification (vString * const ident, ocaToken what);
911 * Parse a record type
912 * type ident = // parsed previously
914 * ident1: type1;
915 * ident2: type2;
918 static void typeRecord (vString * const ident, ocaToken what)
920 switch (what)
922 case OcaIDENTIFIER:
923 addTag (ident, K_RECORDFIELD);
924 terminatingToken = Tok_CurlR;
925 waitedToken = Tok_semi;
926 comeAfter = &typeRecord;
927 toDoNext = &tillTokenOrTerminatingOrFallback;
928 break;
930 case OcaKEYWORD_mutable:
931 /* ignore it */
932 break;
934 case Tok_CurlR:
935 popStrongContext ();
936 toDoNext = &globalScope;
937 break;
939 default: /* don't care */
940 break;
944 /* handle :
945 * exception ExceptionName ... */
946 static void exceptionDecl (vString * const ident, ocaToken what)
948 if (what == OcaIDENTIFIER)
950 addTag (ident, K_EXCEPTION);
952 /* don't know what to do on else... */
954 toDoNext = &globalScope;
957 tagEntryInfo tempTag;
958 vString *tempIdent;
960 /* Ensure a constructor is not a type path beginning
961 * with a module */
962 static void constructorValidation (vString * const ident, ocaToken what)
964 switch (what)
966 case Tok_Op: /* if we got a '.' which is an operator */
967 toDoNext = &globalScope;
968 popStrongContext ();
969 needStrongPoping = FALSE;
970 break;
972 case OcaKEYWORD_of: /* OK, it must be a constructor :) */
973 makeTagEntry (&tempTag);
974 vStringClear (tempIdent);
975 toDoNext = &tillTokenOrFallback;
976 comeAfter = &typeSpecification;
977 waitedToken = Tok_Pipe;
978 break;
980 case Tok_Pipe: /* OK, it was a constructor :) */
981 makeTagEntry (&tempTag);
982 vStringClear (tempIdent);
983 toDoNext = &typeSpecification;
984 break;
986 default: /* and mean that we're not facing a module name */
987 makeTagEntry (&tempTag);
988 vStringClear (tempIdent);
989 toDoNext = &tillTokenOrFallback;
990 comeAfter = &typeSpecification;
991 waitedToken = Tok_Pipe;
993 /* nothing in the context, discard it */
994 popStrongContext ();
996 /* to be sure we use this token */
997 globalScope (ident, what);
1002 /* Parse beginning of type definition
1003 * type 'avar ident =
1004 * or
1005 * type ('var1, 'var2) ident =
1007 static void typeDecl (vString * const ident, ocaToken what)
1010 switch (what)
1012 /* parameterized */
1013 case Tok_Prime:
1014 comeAfter = &typeDecl;
1015 toDoNext = &ignoreToken;
1016 break;
1017 /* LOTS of parameters */
1018 case Tok_PARL:
1019 comeAfter = &typeDecl;
1020 waitedToken = Tok_PARR;
1021 toDoNext = &tillToken;
1022 break;
1024 case OcaIDENTIFIER:
1025 addTag (ident, K_TYPE);
1026 pushStrongContext (ident, ContextType);
1027 requestStrongPoping ();
1028 waitedToken = Tok_EQ;
1029 comeAfter = &typeSpecification;
1030 toDoNext = &tillTokenOrFallback;
1031 break;
1033 default:
1034 globalScope (ident, what);
1038 /* Parse type of kind
1039 * type bidule = Ctor1 of ...
1040 * | Ctor2
1041 * | Ctor3 of ...
1042 * or
1043 * type bidule = | Ctor1 of ... | Ctor2
1045 * when type bidule = { ... } is detected,
1046 * let typeRecord handle it. */
1047 static void typeSpecification (vString * const ident, ocaToken what)
1050 switch (what)
1052 case OcaIDENTIFIER:
1053 if (isUpperAlpha (ident->buffer[0]))
1055 /* here we handle type aliases of type
1056 * type foo = AnotherModule.bar
1057 * AnotherModule can mistakenly be took
1058 * for a constructor. */
1059 vStringCopy (tempIdent, ident);
1060 prepareTag (&tempTag, tempIdent, K_CONSTRUCTOR);
1061 toDoNext = &constructorValidation;
1063 else
1065 toDoNext = &tillTokenOrFallback;
1066 comeAfter = &typeSpecification;
1067 waitedToken = Tok_Pipe;
1069 break;
1071 case OcaKEYWORD_and:
1072 toDoNext = &typeDecl;
1073 break;
1075 case Tok_BRL: /* the '[' & ']' are ignored to accommodate */
1076 case Tok_BRR: /* with the revised syntax */
1077 case Tok_Pipe:
1078 /* just ignore it */
1079 break;
1081 case Tok_CurlL:
1082 toDoNext = &typeRecord;
1083 break;
1085 default: /* don't care */
1086 break;
1091 static boolean dirtySpecialParam = FALSE;
1094 /* parse the ~label and ~label:type parameter */
1095 static void parseLabel (vString * const ident, ocaToken what)
1097 static int parCount = 0;
1099 switch (what)
1101 case OcaIDENTIFIER:
1102 if (!dirtySpecialParam)
1105 if (exportLocalInfo)
1106 addTag (ident, K_VAR);
1108 dirtySpecialParam = TRUE;
1110 break;
1112 case Tok_PARL:
1113 parCount++;
1114 break;
1116 case Tok_PARR:
1117 parCount--;
1118 if (parCount == 0)
1119 toDoNext = &letParam;
1120 break;
1122 case Tok_Op:
1123 if (ident->buffer[0] == ':')
1125 toDoNext = &ignoreToken;
1126 comeAfter = &letParam;
1128 else if (parCount == 0 && dirtySpecialParam)
1130 toDoNext = &letParam;
1131 letParam (ident, what);
1133 break;
1135 default:
1136 if (parCount == 0 && dirtySpecialParam)
1138 toDoNext = &letParam;
1139 letParam (ident, what);
1141 break;
1146 /* Optional argument with syntax like this :
1147 * ?(foo = value) */
1148 static void parseOptionnal (vString * const ident, ocaToken what)
1150 static int parCount = 0;
1153 switch (what)
1155 case OcaIDENTIFIER:
1156 if (!dirtySpecialParam)
1158 if (exportLocalInfo)
1159 addTag (ident, K_VAR);
1161 dirtySpecialParam = TRUE;
1163 if (parCount == 0)
1164 toDoNext = &letParam;
1166 break;
1168 case Tok_PARL:
1169 parCount++;
1170 break;
1172 case Tok_PARR:
1173 parCount--;
1174 if (parCount == 0)
1175 toDoNext = &letParam;
1176 break;
1178 default: /* don't care */
1179 break;
1184 /** handle let inside functions (so like it's name
1185 * say : local let */
1186 static void localLet (vString * const ident, ocaToken what)
1188 switch (what)
1190 case Tok_PARL:
1191 /* We ignore this token to be able to parse such
1192 * declarations :
1193 * let (ident : type) = ...
1195 break;
1197 case OcaKEYWORD_rec:
1198 /* just ignore to be able to parse such declarations:
1199 * let rec ident = ... */
1200 break;
1202 case Tok_Op:
1203 /* we are defining a new operator, it's a
1204 * function definition */
1205 if (exportLocalInfo)
1206 addTag (ident, K_FUNCTION);
1208 pushSoftContext (mayRedeclare, ident, ContextFunction);
1209 toDoNext = &letParam;
1210 break;
1212 /* Can be a weiiird binding, or an '_' */
1213 case Tok_Val:
1214 if (exportLocalInfo)
1215 addTag (ident, K_VAR);
1216 pushSoftContext (mayRedeclare, ident, ContextValue);
1217 toDoNext = &letParam;
1218 break;
1220 case OcaIDENTIFIER:
1221 if (exportLocalInfo)
1222 addTag (ident, K_VAR);
1223 pushSoftContext (mayRedeclare, ident, ContextValue);
1224 toDoNext = &letParam;
1225 break;
1227 case OcaKEYWORD_end:
1228 popStrongContext ();
1229 break;
1231 default:
1232 toDoNext = &localScope;
1233 break;
1237 /* parse :
1238 * | pattern pattern -> ...
1239 * or
1240 * pattern apttern apttern -> ...
1241 * we ignore all identifiers declared in the pattern,
1242 * because their scope is likely to be even more limited
1243 * than the let definitions.
1244 * Used after a match ... with, or a function ... or fun ...
1245 * because their syntax is similar. */
1246 static void matchPattern (vString * const UNUSED (ident), ocaToken what)
1248 switch (what)
1250 case Tok_To:
1251 pushEmptyContext (&matchPattern);
1252 toDoNext = &mayRedeclare;
1253 break;
1256 case OcaKEYWORD_in:
1257 popLastNamed ();
1258 break;
1260 default:
1261 break;
1265 /* Used at the beginning of a new scope (begin of a
1266 * definition, parenthesis...) to catch inner let
1267 * definition that may be in. */
1268 static void mayRedeclare (vString * const ident, ocaToken what)
1270 switch (what)
1272 case OcaKEYWORD_let:
1273 case OcaKEYWORD_val:
1274 toDoNext = localLet;
1275 break;
1277 case OcaKEYWORD_object:
1278 vStringClear (lastClass);
1279 pushContext (ContextStrong, ContextClass,
1280 &localScope, NULL /*voidName */ );
1281 needStrongPoping = FALSE;
1282 toDoNext = &globalScope;
1283 break;
1285 case OcaKEYWORD_for:
1286 case OcaKEYWORD_while:
1287 toDoNext = &tillToken;
1288 waitedToken = OcaKEYWORD_do;
1289 comeAfter = &mayRedeclare;
1290 break;
1292 case OcaKEYWORD_try:
1293 toDoNext = &mayRedeclare;
1294 pushSoftContext (matchPattern, ident, ContextFunction);
1295 break;
1297 case OcaKEYWORD_fun:
1298 toDoNext = &matchPattern;
1299 break;
1301 /* Handle the special ;; from the OCaml
1302 * Top level */
1303 case Tok_semi:
1304 default:
1305 toDoNext = &localScope;
1306 localScope (ident, what);
1310 /* parse :
1311 * p1 p2 ... pn = ...
1312 * or
1313 * ?(p1=v) p2 ~p3 ~pn:ja ... = ... */
1314 static void letParam (vString * const ident, ocaToken what)
1316 switch (what)
1318 case Tok_EQ:
1319 toDoNext = &mayRedeclare;
1320 break;
1322 case OcaIDENTIFIER:
1323 if (exportLocalInfo)
1324 addTag (ident, K_VAR);
1325 break;
1327 case Tok_Op:
1328 switch (ident->buffer[0])
1330 case ':':
1331 /*popSoftContext(); */
1332 /* we got a type signature */
1333 comeAfter = &mayRedeclare;
1334 toDoNext = &tillTokenOrFallback;
1335 waitedToken = Tok_EQ;
1336 break;
1338 /* parse something like
1339 * ~varname:type
1340 * or
1341 * ~varname
1342 * or
1343 * ~(varname: long type) */
1344 case '~':
1345 toDoNext = &parseLabel;
1346 dirtySpecialParam = FALSE;
1347 break;
1349 /* Optional argument with syntax like this :
1350 * ?(bla = value)
1351 * or
1352 * ?bla */
1353 case '?':
1354 toDoNext = &parseOptionnal;
1355 dirtySpecialParam = FALSE;
1356 break;
1358 default:
1359 break;
1361 break;
1363 default: /* don't care */
1364 break;
1369 /* parse object ...
1370 * used to be sure the class definition is not a type
1371 * alias */
1372 static void classSpecif (vString * const UNUSED (ident), ocaToken what)
1374 switch (what)
1376 case OcaKEYWORD_object:
1377 pushStrongContext (lastClass, ContextClass);
1378 toDoNext = &globalScope;
1379 break;
1381 default:
1382 vStringClear (lastClass);
1383 toDoNext = &globalScope;
1387 /* Handle a method ... class declaration.
1388 * nearly a copy/paste of globalLet. */
1389 static void methodDecl (vString * const ident, ocaToken what)
1391 switch (what)
1393 case Tok_PARL:
1394 /* We ignore this token to be able to parse such
1395 * declarations :
1396 * let (ident : type) = ... */
1397 break;
1399 case OcaKEYWORD_mutable:
1400 case OcaKEYWORD_virtual:
1401 case OcaKEYWORD_rec:
1402 /* just ignore to be able to parse such declarations:
1403 * let rec ident = ... */
1404 break;
1406 case OcaIDENTIFIER:
1407 addTag (ident, K_METHOD);
1408 /* Normal pushing to get good subs */
1409 pushStrongContext (ident, ContextMethod);
1410 /*pushSoftContext( globalScope, ident, ContextMethod ); */
1411 toDoNext = &letParam;
1412 break;
1414 case OcaKEYWORD_end:
1415 popStrongContext ();
1416 break;
1418 default:
1419 toDoNext = &globalScope;
1420 break;
1424 /* name of the last module, used for
1425 * context stacking. */
1426 vString *lastModule;
1429 /* parse
1430 * ... struct (* new global scope *) end
1431 * or
1432 * ... sig (* new global scope *) end
1433 * or
1434 * functor ... -> moduleSpecif
1436 static void moduleSpecif (vString * const ident, ocaToken what)
1438 switch (what)
1440 case OcaKEYWORD_functor:
1441 toDoNext = &contextualTillToken;
1442 waitedToken = Tok_To;
1443 comeAfter = &moduleSpecif;
1444 break;
1446 case OcaKEYWORD_struct:
1447 case OcaKEYWORD_sig:
1448 pushStrongContext (lastModule, ContextModule);
1449 toDoNext = &globalScope;
1450 break;
1452 case Tok_PARL: /* ( */
1453 toDoNext = &contextualTillToken;
1454 comeAfter = &globalScope;
1455 waitedToken = Tok_PARR;
1456 contextualTillToken (ident, what);
1457 break;
1459 default:
1460 vStringClear (lastModule);
1461 toDoNext = &globalScope;
1465 /* parse :
1466 * module name = ...
1467 * then pass the token stream to moduleSpecif */
1468 static void moduleDecl (vString * const ident, ocaToken what)
1470 switch (what)
1472 case OcaKEYWORD_type:
1473 /* just ignore it, name come after */
1474 break;
1476 case OcaIDENTIFIER:
1477 addTag (ident, K_MODULE);
1478 vStringCopy (lastModule, ident);
1479 waitedToken = Tok_EQ;
1480 comeAfter = &moduleSpecif;
1481 toDoNext = &contextualTillToken;
1482 break;
1484 default: /* don't care */
1485 break;
1489 /* parse :
1490 * class name = ...
1491 * or
1492 * class virtual ['a,'b] classname = ... */
1493 static void classDecl (vString * const ident, ocaToken what)
1495 switch (what)
1497 case OcaIDENTIFIER:
1498 addTag (ident, K_CLASS);
1499 vStringCopy (lastClass, ident);
1500 toDoNext = &contextualTillToken;
1501 waitedToken = Tok_EQ;
1502 comeAfter = &classSpecif;
1503 break;
1505 case Tok_BRL:
1506 toDoNext = &tillToken;
1507 waitedToken = Tok_BRR;
1508 comeAfter = &classDecl;
1509 break;
1511 default:
1512 break;
1516 /* Handle a global
1517 * let ident ...
1518 * or
1519 * let rec ident ... */
1520 static void globalLet (vString * const ident, ocaToken what)
1522 switch (what)
1524 case Tok_PARL:
1525 /* We ignore this token to be able to parse such
1526 * declarations :
1527 * let (ident : type) = ...
1529 break;
1531 case OcaKEYWORD_mutable:
1532 case OcaKEYWORD_virtual:
1533 case OcaKEYWORD_rec:
1534 /* just ignore to be able to parse such declarations:
1535 * let rec ident = ... */
1536 break;
1538 case Tok_Op:
1539 /* we are defining a new operator, it's a
1540 * function definition */
1541 addTag (ident, K_FUNCTION);
1542 pushStrongContext (ident, ContextFunction);
1543 toDoNext = &letParam;
1544 break;
1546 case OcaIDENTIFIER:
1547 addTag (ident, K_VAR);
1548 pushStrongContext (ident, ContextValue);
1549 requestStrongPoping ();
1550 toDoNext = &letParam;
1551 break;
1553 case OcaKEYWORD_end:
1554 popStrongContext ();
1555 break;
1557 default:
1558 toDoNext = &globalScope;
1559 break;
1563 /* Handle the "strong" top levels, all 'big' declarations
1564 * happen here */
1565 static void globalScope (vString * const UNUSED (ident), ocaToken what)
1567 /* Do not touch, this is used only by the global scope
1568 * to handle an 'and' */
1569 static parseNext previousParser = NULL;
1571 switch (what)
1573 case OcaKEYWORD_and:
1574 cleanupPreviousParser ();
1575 toDoNext = previousParser;
1576 break;
1578 case OcaKEYWORD_type:
1579 cleanupPreviousParser ();
1580 toDoNext = &typeDecl;
1581 previousParser = &typeDecl;
1582 break;
1584 case OcaKEYWORD_class:
1585 cleanupPreviousParser ();
1586 toDoNext = &classDecl;
1587 previousParser = &classDecl;
1588 break;
1590 case OcaKEYWORD_module:
1591 cleanupPreviousParser ();
1592 toDoNext = &moduleDecl;
1593 previousParser = &moduleDecl;
1594 break;
1596 case OcaKEYWORD_end:
1597 needStrongPoping = FALSE;
1598 killCurrentState ();
1599 /*popStrongContext(); */
1600 break;
1602 case OcaKEYWORD_method:
1603 cleanupPreviousParser ();
1604 toDoNext = &methodDecl;
1605 /* and is not allowed in methods */
1606 break;
1608 /* val is mixed with let as global
1609 * to be able to handle mli & new syntax */
1610 case OcaKEYWORD_val:
1611 case OcaKEYWORD_let:
1612 cleanupPreviousParser ();
1613 toDoNext = &globalLet;
1614 previousParser = &globalLet;
1615 break;
1617 case OcaKEYWORD_exception:
1618 cleanupPreviousParser ();
1619 toDoNext = &exceptionDecl;
1620 previousParser = NULL;
1621 break;
1623 /* must be a #line directive, discard the
1624 * whole line. */
1625 case Tok_Sharp:
1626 /* ignore */
1627 break;
1629 default:
1630 /* we don't care */
1631 break;
1635 /* Parse expression. Well ignore it is more the case,
1636 * ignore all tokens except "shocking" keywords */
1637 static void localScope (vString * const ident, ocaToken what)
1639 switch (what)
1641 case Tok_Pipe:
1642 case Tok_PARR:
1643 case Tok_BRR:
1644 case Tok_CurlR:
1645 popSoftContext ();
1646 break;
1648 /* Everything that `begin` has an `end`
1649 * as end is overloaded and signal many end
1650 * of things, we add an empty strong context to
1651 * avoid problem with the end.
1653 case OcaKEYWORD_begin:
1654 pushContext (ContextStrong, ContextBlock, &mayRedeclare, NULL);
1655 toDoNext = &mayRedeclare;
1656 break;
1658 case OcaKEYWORD_in:
1659 popLastNamed ();
1660 break;
1662 /* Ok, we got a '{', which is much likely to create
1663 * a record. We cannot treat it like other [ && (,
1664 * because it may contain the 'with' keyword and screw
1665 * everything else. */
1666 case Tok_CurlL:
1667 toDoNext = &contextualTillToken;
1668 waitedToken = Tok_CurlR;
1669 comeAfter = &localScope;
1670 contextualTillToken (ident, what);
1671 break;
1673 /* Yeah imperative feature of OCaml,
1674 * a ';' like in C */
1675 case Tok_semi:
1676 toDoNext = &mayRedeclare;
1677 break;
1679 case Tok_PARL:
1680 case Tok_BRL:
1681 pushEmptyContext (&localScope);
1682 toDoNext = &mayRedeclare;
1683 break;
1685 case OcaKEYWORD_and:
1686 popLastNamed ();
1687 toDoNext = &localLet;
1688 break;
1690 case OcaKEYWORD_else:
1691 case OcaKEYWORD_then:
1692 popSoftContext ();
1693 pushEmptyContext (&localScope);
1694 toDoNext = &mayRedeclare;
1695 break;
1697 case OcaKEYWORD_if:
1698 pushEmptyContext (&localScope);
1699 toDoNext = &mayRedeclare;
1700 break;
1702 case OcaKEYWORD_match:
1703 pushEmptyContext (&localScope);
1704 toDoNext = &mayRedeclare;
1705 break;
1707 case OcaKEYWORD_with:
1708 popSoftContext ();
1709 toDoNext = &matchPattern;
1710 pushEmptyContext (&matchPattern);
1711 break;
1713 case OcaKEYWORD_end:
1714 killCurrentState ();
1715 break;
1718 case OcaKEYWORD_fun:
1719 comeAfter = &mayRedeclare;
1720 toDoNext = &tillToken;
1721 waitedToken = Tok_To;
1722 break;
1724 case OcaKEYWORD_done:
1725 case OcaKEYWORD_val:
1726 /* doesn't care */
1727 break;
1729 default:
1730 requestStrongPoping ();
1731 globalScope (ident, what);
1732 break;
1736 /*////////////////////////////////////////////////////////////////
1737 //// Deal with the system */
1738 /* in OCaml the file name is the module name used in the language
1739 * with it first letter put in upper case */
1740 static void computeModuleName ( void )
1742 /* in Ocaml the file name define a module.
1743 * so we define a module =)
1745 const char *filename = getSourceFileName ();
1746 int beginIndex = 0;
1747 int endIndex = strlen (filename) - 1;
1748 vString *moduleName = vStringNew ();
1750 while (filename[endIndex] != '.' && endIndex > 0)
1751 endIndex--;
1753 /* avoid problem with path in front of filename */
1754 beginIndex = endIndex;
1755 while (beginIndex > 0)
1757 if (filename[beginIndex] == '\\' || filename[beginIndex] == '/')
1759 beginIndex++;
1760 break;
1763 beginIndex--;
1766 vStringNCopyS (moduleName, &filename[beginIndex], endIndex - beginIndex);
1767 vStringTerminate (moduleName);
1769 if (isLowerAlpha (moduleName->buffer[0]))
1770 moduleName->buffer[0] += ('A' - 'a');
1772 makeSimpleTag (moduleName, OcamlKinds, K_MODULE);
1773 vStringDelete (moduleName);
1776 /* Allocate all string of the context stack */
1777 static void initStack ( void )
1779 int i;
1780 for (i = 0; i < OCAML_MAX_STACK_SIZE; ++i)
1781 stack[i].contextName = vStringNew ();
1784 static void clearStack ( void )
1786 int i;
1787 for (i = 0; i < OCAML_MAX_STACK_SIZE; ++i)
1788 vStringDelete (stack[i].contextName);
1791 static void findOcamlTags (void)
1793 vString *name = vStringNew ();
1794 lexingState st;
1795 ocaToken tok;
1797 computeModuleName ();
1798 initStack ();
1799 tempIdent = vStringNew ();
1800 lastModule = vStringNew ();
1801 lastClass = vStringNew ();
1802 voidName = vStringNew ();
1803 vStringCopyS (voidName, "_");
1805 st.name = vStringNew ();
1806 st.cp = fileReadLine ();
1807 toDoNext = &globalScope;
1808 tok = lex (&st);
1809 while (tok != Tok_EOF)
1811 (*toDoNext) (st.name, tok);
1812 tok = lex (&st);
1815 vStringDelete (name);
1816 vStringDelete (voidName);
1817 vStringDelete (tempIdent);
1818 vStringDelete (lastModule);
1819 vStringDelete (lastClass);
1820 clearStack ();
1823 static void ocamlInitialize (const langType language)
1825 Lang_Ocaml = language;
1827 initOperatorTable ();
1828 initKeywordHash ();
1831 extern parserDefinition *OcamlParser (void)
1833 static const char *const extensions[] = { "ml", "mli", NULL };
1834 parserDefinition *def = parserNew ("OCaml");
1835 def->kinds = OcamlKinds;
1836 def->kindCount = KIND_COUNT (OcamlKinds);
1837 def->extensions = extensions;
1838 def->parser = findOcamlTags;
1839 def->initialize = ocamlInitialize;
1841 return def;