Replace utils_make_human_readable_str() with g_format_size()
[geany-mirror.git] / ctags / parsers / r.c
blobe66b94faedc6b32f4db992892e3a0954ef533521
1 /*
2 * Copyright (c) 2003-2004, Ascher Stefan <stievie@utanet.at>
3 * Copyright (c) 2020, Masatake YAMATO <yamato@redhat.com>
4 * Copyright (c) 2020, Red Hat, Inc.
6 * This source code is released for free distribution under the terms of the
7 * GNU General Public License version 2 or (at your option) any later version.
9 * This module contains functions for generating tags for R language files.
10 * R is a programming language for statistical computing.
11 * R is GPL Software, get it from http://www.r-project.org/
13 * The language references are available at
14 * https://cran.r-project.org/manuals.html, and
15 * https://cran.r-project.org/doc/manuals/r-release/R-lang.html
17 * The base library (including library and source functions) release is at
18 * https://stat.ethz.ch/R-manual/R-devel/library/base/html/00Index.html
22 * INCLUDE FILES
24 #include "general.h" /* must always come first */
26 #include "debug.h"
27 #include "entry.h"
28 #include "keyword.h"
29 #include "parse.h"
30 #include "read.h"
31 #include "selectors.h"
32 #include "tokeninfo.h"
33 #include "trace.h"
34 #include "vstring.h"
35 #include "subparser.h"
36 #include "r.h"
38 #include <string.h>
39 #include <ctype.h> /* to define isalpha(), isalnum(), isspace() */
43 * MACROS
45 #ifdef DEBUG
46 #define R_TRACE_TOKEN_TEXT(TXT,T,Q) TRACE_PRINT("<%s> token: %s (%s), parent: %s", \
47 (TXT), \
48 tokenIsTypeVal(T, '\n')? "\\n": tokenString(T), \
49 tokenTypeStr(T->type), \
50 (Q) == CORK_NIL? "": getEntryInCorkQueue(Q)->name)
51 #define R_TRACE_TOKEN(T,Q) TRACE_PRINT("token: %s (%s), parent: %s", \
52 tokenIsTypeVal((T), '\n')? "\\n": tokenString(T), \
53 tokenTypeStr((T)->type), \
54 (Q) == CORK_NIL? "": getEntryInCorkQueue(Q)->name)
56 #define R_TRACE_ENTER() TRACE_ENTER_TEXT("token: %s (%s), parent: %s", \
57 tokenIsTypeVal(token, '\n')? "\\n": tokenString(token), \
58 tokenTypeStr(token->type), \
59 parent == CORK_NIL? "": getEntryInCorkQueue(parent)->name)
60 #define R_TRACE_LEAVE() TRACE_LEAVE()
61 #else
62 #define R_TRACE_TOKEN_TEXT(TXT,T,Q) do {} while (0);
63 #define R_TRACE_TOKEN(T,Q) do {} while (0);
64 #define R_TRACE_ENTER() do {} while (0);
65 #define R_TRACE_LEAVE() do {} while (0);
66 #endif
70 * DATA DEFINITIONS
72 typedef enum {
73 K_UNDEFINED = -1,
74 K_FUNCTION,
75 K_LIBRARY,
76 K_SOURCE,
77 K_GLOBALVAR,
78 K_FUNCVAR,
79 K_PARAM,
80 K_VECTOR,
81 K_LIST,
82 K_DATAFRAME,
83 K_NAMEATTR,
84 KIND_COUNT
85 } rKind;
87 typedef enum {
88 R_LIBRARY_ATTACHED_BY_LIBRARY,
89 R_LIBRARY_ATTACHED_BY_REQUIRE,
90 } rLibraryRole;
92 typedef enum {
93 R_SOURCE_LOADED_BY_SOURCE,
94 } rSourceRole;
96 static roleDefinition RLibraryRoles [] = {
97 { true, "library", "library attached by library function" },
98 { true, "require", "library attached by require function" },
101 static roleDefinition RSourceRoles [] = {
102 { true, "source", "source loaded by source fucntion" },
105 static kindDefinition RKinds[KIND_COUNT] = {
106 {true, 'f', "function", "functions"},
107 {true, 'l', "library", "libraries",
108 .referenceOnly = true, ATTACH_ROLES (RLibraryRoles) },
109 {true, 's', "source", "sources",
110 .referenceOnly = true, ATTACH_ROLES (RSourceRoles) },
111 {true, 'g', "globalVar", "global variables having values other than function()"},
112 {true, 'v', "functionVar", "function variables having values other than function()"},
113 {false,'z', "parameter", "function parameters inside function definitions" },
114 {true, 'c', "vector", "vectors explicitly created with `c()'" },
115 {true, 'L', "list", "lists explicitly created with `list()'" },
116 {true, 'd', "dataframe", "data frame explicitly created with `data.frame()'" },
117 {true, 'n', "nameattr", "names attribtes in vectors, lists, or dataframes" },
120 struct sKindExtraInfo {
121 const char *anon_prefix;
122 const char *ctor;
125 static struct sKindExtraInfo kindExtraInfo[KIND_COUNT] = {
126 [K_FUNCTION] = {
127 "anonFunc",
128 "function",
130 [K_VECTOR] = {
131 "anonVec",
132 "c",
134 [K_LIST] = {
135 "anonList",
136 "list",
138 [K_DATAFRAME] = {
139 "anonDataFrame",
140 "data.frame",
144 typedef enum {
145 F_ASSIGNMENT_OPERATOR,
146 F_CONSTRUCTOR,
147 } rField;
149 static fieldDefinition RFields [] = {
151 .name = "assignmentop",
152 .description = "operator for assignment",
153 .enabled = false,
156 .name = "constructor",
157 .description = "function used for making value assigned to the nameattr tag",
158 .enabled = true,
162 typedef int keywordId; /* to allow KEYWORD_NONE */
164 static const keywordTable RKeywordTable [] = {
165 { "c", KEYWORD_R_C },
166 { "list", KEYWORD_R_LIST },
167 { "data.frame",KEYWORD_R_DATAFRAME },
168 { "function", KEYWORD_R_FUNCTION },
169 { "if", KEYWORD_R_IF },
170 { "else", KEYWORD_R_ELSE },
171 { "for", KEYWORD_R_FOR },
172 { "while", KEYWORD_R_WHILE },
173 { "repeat", KEYWORD_R_REPEAT },
174 { "in", KEYWORD_R_IN },
175 { "next", KEYWORD_R_NEXT },
176 { "break", KEYWORD_R_BREAK },
177 { "TRUE", KEYWORD_R_TRUE, },
178 { "FALSE", KEYWORD_R_FALSE, },
179 { "NULL", KEYWORD_R_NULL, },
180 { "Inf", KEYWORD_R_INF, },
181 { "NaN", KEYWORD_R_NAN, },
182 { "NA", KEYWORD_R_NA, },
183 { "NA_integer_", KEYWORD_R_NA, },
184 { "NA_real_", KEYWORD_R_NA, },
185 { "NA_complex_", KEYWORD_R_NA, },
186 { "NA_character_", KEYWORD_R_NA, },
187 { "source", KEYWORD_R_SOURCE },
188 { "library", KEYWORD_R_LIBRARY },
189 { "require", KEYWORD_R_LIBRARY },
192 #ifdef DEBUG
193 static const char *tokenTypeStr(enum RTokenType e);
194 #endif
196 static struct tokenTypePair typePairs [] = {
197 { '{', '}' },
198 { '[', ']' },
199 { '(', ')' },
202 typedef struct sRToken {
203 tokenInfo base;
204 int scopeIndex;
205 int parenDepth;
206 vString *signature;
207 int kindIndexForParams; /* Used only when gathering parameters */
208 } rToken;
210 #define R(TOKEN) ((rToken *)TOKEN)
212 static int blackHoleIndex;
214 static langType Lang_R;
216 static void readToken (tokenInfo *const token, void *data);
217 static void clearToken (tokenInfo *token);
218 static struct tokenInfoClass rTokenInfoClass = {
219 .nPreAlloc = 4,
220 .typeForUndefined = TOKEN_R_UNDEFINED,
221 .keywordNone = KEYWORD_NONE,
222 .typeForKeyword = TOKEN_R_KEYWORD,
223 .typeForEOF = TOKEN_R_EOF,
224 .extraSpace = sizeof (rToken) - sizeof (tokenInfo),
225 .pairs = typePairs,
226 .pairCount = ARRAY_SIZE (typePairs),
227 .init = NULL,
228 .read = readToken,
229 .clear = clearToken,
230 .copy = NULL,
235 * FUNCTION PROTOTYPES
237 static bool parseStatement (tokenInfo *const token, int parent, bool in_arglist, bool in_continuous_pair);
238 static void parsePair (tokenInfo *const token, int parent, tokenInfo *const funcall);
240 static int notifyReadRightSideSymbol (tokenInfo *const symbol,
241 const char *const assignmentOperator,
242 int parent,
243 tokenInfo *const token);
244 static int makeSimpleSubparserTag (int langType, tokenInfo *const token, int parent,
245 bool in_func, int kindInR, const char *assignmentOperator);
246 static bool askSubparserTagAcceptancy (tagEntryInfo *pe);
247 static bool askSubparserTagHasFunctionAlikeKind (tagEntryInfo *e);
248 static int notifyReadFuncall (tokenInfo *const func, tokenInfo *const token, int parent);
251 * FUNCTION DEFINITIONS
253 static bool hasKindsOrCtors (tagEntryInfo * e, int kinds[], size_t count)
255 if (e->langType == Lang_R)
257 for (size_t i = 0; i < count; i++)
259 if (e->kindIndex == kinds[i])
260 return true;
263 else
265 bool function = false;
266 for (size_t i = 0; i < count; i++)
268 if (K_FUNCTION == kinds[i])
270 function = true;
271 break;
274 if (function && askSubparserTagHasFunctionAlikeKind (e))
275 return true;
278 const char *tmp = getParserFieldValueForType (e,
279 RFields [F_CONSTRUCTOR].ftype);
280 if (tmp == NULL)
281 return false;
283 for (size_t i = 0; i < count; i++)
285 const char * ctor = kindExtraInfo [kinds[i]].ctor;
286 if (ctor && strcmp (tmp, ctor) == 0)
287 return true;
290 return false;
293 static int searchScopeOtherThan (int scope, int kinds[], size_t count)
297 tagEntryInfo * e = getEntryInCorkQueue (scope);
298 if (!e)
299 return CORK_NIL;
301 if (!hasKindsOrCtors (e, kinds, count))
302 return scope;
304 scope = e->extensionFields.scopeIndex;
306 while (1);
309 static int makeSimpleRTagR (tokenInfo *const token, int parent, int kind,
310 const char * assignmentOp)
312 if (assignmentOp && (strlen (assignmentOp) == 3))
314 /* <<- or ->> is used here. */
315 if (anyKindsEntryInScopeRecursive (parent, tokenString (token),
316 (int[]){K_FUNCTION,
317 K_GLOBALVAR,
318 K_FUNCVAR,
319 K_PARAM}, 4) != CORK_NIL)
320 return CORK_NIL;
322 parent = CORK_NIL;
325 /* If the tag (T) to be created is defined in a scope and
326 the scope already has another tag having the same name
327 as T, T should not be created. */
328 tagEntryInfo *pe = getEntryInCorkQueue (parent);
329 int cousin = CORK_NIL;
330 if (pe && ((pe->langType == Lang_R && pe->kindIndex == K_FUNCTION)
331 || (pe->langType != Lang_R && askSubparserTagHasFunctionAlikeKind (pe))))
333 cousin = anyEntryInScope (parent, tokenString (token));
334 if (kind == K_GLOBALVAR)
335 kind = K_FUNCVAR;
337 else if (pe && (kind == K_GLOBALVAR)
338 && hasKindsOrCtors (pe, (int[]){K_VECTOR, K_LIST, K_DATAFRAME}, 3))
340 parent = searchScopeOtherThan (pe->extensionFields.scopeIndex,
341 (int[]){K_VECTOR, K_LIST, K_DATAFRAME}, 3);
342 if (parent == CORK_NIL)
343 cousin = anyKindEntryInScope (parent, tokenString (token), K_GLOBALVAR);
344 else
346 cousin = anyKindEntryInScope (parent, tokenString (token), K_FUNCVAR);
347 kind = K_FUNCVAR;
350 else if (pe)
352 /* The condition for tagging is a bit relaxed here.
353 Even if the same name tag is created in the scope, a name
354 is tagged if kinds are different. */
355 cousin = anyKindEntryInScope (parent, tokenString (token), kind);
357 if (cousin != CORK_NIL)
358 return CORK_NIL;
360 int corkIndex = makeSimpleTag (token->string, kind);
361 tagEntryInfo *tag = getEntryInCorkQueue (corkIndex);
362 if (tag)
364 tag->extensionFields.scopeIndex = parent;
365 if (assignmentOp)
367 if (strlen (assignmentOp) > 0)
368 attachParserField (tag, true,
369 RFields [F_ASSIGNMENT_OPERATOR].ftype,
370 assignmentOp);
371 else
372 markTagExtraBit (tag, XTAG_ANONYMOUS);
374 registerEntry (corkIndex);
376 return corkIndex;
379 static int makeSimpleRTag (tokenInfo *const token, int parent, bool in_func, int kind,
380 const char * assignmentOp)
382 int r;
383 const char *ctor = kindExtraInfo [kind].ctor;
384 tagEntryInfo *pe = (parent == CORK_NIL)? NULL: getEntryInCorkQueue (parent);
386 /* makeTagWithTranslation method for subparsers
387 called from makeSimpleSubparserTag expects
388 kind should be resolved. */
389 if (pe && hasKindsOrCtors (pe, (int[]){K_VECTOR, K_LIST, K_DATAFRAME}, 3))
391 if (assignmentOp
392 && strcmp (assignmentOp, "=") == 0)
393 kind = K_NAMEATTR;
396 bool foreign_tag = false;
397 if (pe == NULL || pe->langType == Lang_R ||
398 !askSubparserTagAcceptancy (pe))
399 r = makeSimpleRTagR (token, parent, kind, assignmentOp);
400 else
402 foreign_tag = true;
403 r = makeSimpleSubparserTag (pe->langType, token, parent, in_func,
404 kind, assignmentOp);
407 if ((kind == K_NAMEATTR || foreign_tag) && ctor)
409 tagEntryInfo *e = getEntryInCorkQueue (r);
410 if (e)
411 attachParserField (e, true,
412 RFields [F_CONSTRUCTOR].ftype,
413 ctor);
416 return r;
419 static void clearToken (tokenInfo *token)
421 R (token)->parenDepth = 0;
422 R (token)->scopeIndex = CORK_NIL;
423 R (token)->kindIndexForParams = KIND_GHOST_INDEX;
424 if (R (token)->signature)
426 vStringDelete (R (token)->signature);
427 R (token)->signature = NULL;
431 static void readString (tokenInfo *const token, void *data)
433 int c;
434 bool escaped = false;
436 int c0 = tokenString(token)[0];
438 while (1)
440 c = getcFromInputFile ();
441 switch (c)
443 case EOF:
444 return;
445 case '\'':
446 case '"':
447 case '`':
448 tokenPutc (token, c);
449 if (!escaped && c == c0)
450 return;
451 escaped = false;
452 break;
453 case '\\':
454 tokenPutc (token, c);
455 escaped = !escaped;
456 break;
457 default:
458 tokenPutc (token, c);
459 escaped = false;
460 break;
465 static void readNumber (tokenInfo *const token, void *data)
467 int c;
469 /* 10.3.1 Constants
471 * Valid numeric constants: 1 10 0.1 .2 1e-7 1.2e+7
472 * Valid integer constants: 1L, 0x10L, 1000000L, 1e6L
473 * Valid numeric constants: 1.1L, 1e-3L, 0x1.1p-2
474 * Valid complex constants: 2i 4.1i 1e-2i
476 while ((c = getcFromInputFile ()))
478 if (isxdigit (c) || c == '.' || c == 'E'
479 || c == '+' || c == '-'
480 || c == 'L' || c == 'x' || c == 'p'
481 || c == 'i')
482 tokenPutc (token, c);
483 else
485 ungetcToInputFile (c);
486 break;
491 static void readSymbol (tokenInfo *const token, void *data)
493 int c;
494 while ((c = getcFromInputFile ()))
496 if (isalnum (c) || c == '.' || c == '_')
497 tokenPutc (token, c);
498 else
500 ungetcToInputFile (c);
501 break;
506 static keywordId resolveKeyword (vString *string)
508 char *s = vStringValue (string);
509 static langType lang = LANG_AUTO;
511 if (lang == LANG_AUTO)
512 lang = getInputLanguage ();
514 return lookupCaseKeyword (s, lang);
517 static bool signatureExpectingParameter (vString *signature)
519 if (vStringLast (signature) == '(')
520 return true;
522 for (size_t i = vStringLength (signature); i > 0; i--)
524 char c = vStringChar (signature, i - 1);
525 if (c == ' ')
526 continue;
527 else if (c == ',')
528 return true;
529 break;
531 return false;
534 static void readToken (tokenInfo *const token, void *data)
536 int c, c0;
538 token->type = TOKEN_R_UNDEFINED;
539 token->keyword = KEYWORD_NONE;
540 vStringClear (token->string);
543 c = getcFromInputFile ();
544 while (c == ' ' || c== '\t' || c == '\f');
546 token->lineNumber = getInputLineNumber ();
547 token->filePosition = getInputFilePosition ();
549 switch (c)
551 case EOF:
552 token->type = TOKEN_R_EOF;
553 break;
554 case '#':
555 while (1)
557 c = getcFromInputFile ();
558 if (c == EOF)
560 token->type = TOKEN_R_EOF;
561 break;
563 else if (c == '\n')
565 token->type = c;
566 tokenPutc (token, c);
567 break;
570 break;
571 case '\n':
572 case ';':
573 token->type = c;
574 tokenPutc (token, c);
575 break;
576 case '\'':
577 case '"':
578 case '`':
579 token->type = TOKEN_R_STRING;
580 tokenPutc (token, c);
581 readString (token, data);
582 break;
583 case '+':
584 case '/':
585 case '^':
586 case '~':
587 token->type = TOKEN_R_OPERATOR;
588 tokenPutc (token, c);
589 break;
590 case ':':
591 token->type = TOKEN_R_OPERATOR;
592 tokenPutc (token, c);
593 c = getcFromInputFile ();
594 if (c == ':')
596 tokenPutc (token, c);
597 token->type = TOKEN_R_SCOPE;
598 c = getcFromInputFile ();
599 if (c == ':')
600 tokenPutc (token, c);
601 else
602 ungetcToInputFile (c);
604 else
605 ungetcToInputFile (c);
606 break;
607 case '&':
608 case '|':
609 case '*':
610 token->type = TOKEN_R_OPERATOR;
611 tokenPutc (token, c);
612 c0 = getcFromInputFile ();
613 if (c == c0)
614 tokenPutc (token, c0);
615 else
616 ungetcToInputFile (c0);
617 break;
618 case '=':
619 token->type = TOKEN_R_OPERATOR;
620 tokenPutc (token, c);
621 c = getcFromInputFile ();
622 if (c == '=')
623 tokenPutc (token, c);
624 else
626 token->type = '=';
627 ungetcToInputFile (c);
629 break;
630 case '-':
631 token->type = TOKEN_R_OPERATOR;
632 tokenPutc (token, c);
633 c = getcFromInputFile ();
634 if (c == '>')
636 token->type = TOKEN_R_RASSIGN;
637 tokenPutc (token, c);
638 c = getcFromInputFile ();
639 if (c == '>')
640 tokenPutc (token, c);
641 else
642 ungetcToInputFile (c);
644 else
645 ungetcToInputFile (c);
646 break;
647 case '>':
648 token->type = TOKEN_R_OPERATOR;
649 tokenPutc (token, c);
650 c = getcFromInputFile ();
651 if (c == '=')
652 tokenPutc (token, c);
653 else
654 ungetcToInputFile (c);
655 break;
656 case '<':
657 token->type = TOKEN_R_OPERATOR;
658 tokenPutc (token, c);
659 c = getcFromInputFile ();
661 /* <<- */
662 if (c == '<')
664 tokenPutc (token, c);
665 c = getcFromInputFile ();
668 if (c == '-')
670 token->type = TOKEN_R_LASSIGN;
671 tokenPutc (token, c);
673 else if (c == '=')
674 tokenPutc (token, c);
675 else
676 ungetcToInputFile (c);
677 break;
678 case '%':
679 token->type = TOKEN_R_OPERATOR;
680 tokenPutc (token, c);
683 c = getcFromInputFile ();
684 if (c == EOF)
685 break;
687 tokenPutc (token, c);
688 if (c == '%')
689 break;
691 while (1);
692 break;
693 case '!':
694 token->type = TOKEN_R_OPERATOR;
695 tokenPutc (token, c);
696 c = getcFromInputFile ();
697 if (c == '=')
698 tokenPutc (token, c);
699 else
700 ungetcToInputFile (c);
701 break;
702 case '{':
703 case '}':
704 case '(':
705 case ')':
706 case '[':
707 case ']':
708 case ',':
709 case '$':
710 case '@':
711 token->type = c;
712 tokenPutc (token, c);
713 break;
714 case '.':
715 tokenPutc (token, c);
716 c = getcFromInputFile ();
717 if (isdigit(c))
719 token->type = TOKEN_R_NUMBER;
720 tokenPutc (token, c);
721 readNumber(token, data);
723 else if (isalpha (c) || c == '_')
725 token->type = TOKEN_R_SYMBOL;
726 tokenPutc (token, c);
727 readSymbol (token, data);
729 token->keyword = resolveKeyword (token->string);
730 if (token->keyword != KEYWORD_NONE)
731 token->type = TOKEN_R_KEYWORD;
733 else if (c == '.')
735 token->type = TOKEN_R_DOTS;
736 tokenPutc (token, c);
738 c = getcFromInputFile ();
739 if (c == '.')
740 tokenPutc (token, c);
741 else if (isdigit(c))
743 token->type = TOKEN_R_DOTS_N;
746 tokenPutc (token, c);
747 c = getcFromInputFile ();
749 while (isdigit(c));
750 ungetcToInputFile (c);
752 else if (isalpha (c) || c == '_')
754 token->type = TOKEN_R_SYMBOL;
755 tokenPutc (token, c);
756 readSymbol (token, data);
758 token->keyword = resolveKeyword (token->string);
759 if (token->keyword != KEYWORD_NONE)
760 token->type = TOKEN_R_KEYWORD;
762 else
764 token->type = TOKEN_R_UNDEFINED;
765 ungetcToInputFile (c);
768 break;
769 default:
770 tokenPutc (token, c);
771 if (isdigit (c))
773 token->type = TOKEN_R_NUMBER;
774 readNumber(token, data);
776 else if (isalpha (c))
778 token->type = TOKEN_R_SYMBOL;
779 readSymbol (token, data);
781 token->keyword = resolveKeyword (token->string);
782 if (token->keyword != KEYWORD_NONE)
783 token->type = TOKEN_R_KEYWORD;
785 else
786 token->type = TOKEN_R_UNDEFINED;
787 break;
790 /* Handle parameters in a signature */
791 if (R(token)->signature && !tokenIsType(token, R_EOF) && !tokenIsTypeVal(token, '\n'))
793 vString *signature = R (token)->signature;
795 if (tokenIsTypeVal (token, '('))
796 R (token)->parenDepth++;
797 else if (tokenIsTypeVal (token, ')'))
798 R (token)->parenDepth--;
800 if (R (token)->kindIndexForParams != KIND_GHOST_INDEX
801 && R (token)->parenDepth == 1 && tokenIsType (token, R_SYMBOL)
802 && signatureExpectingParameter (signature))
803 makeSimpleRTag (token, R (token)->scopeIndex, false,
804 R (token)->kindIndexForParams, NULL);
806 if (vStringLast (signature) != '(' &&
807 !tokenIsTypeVal (token, ',') &&
808 !tokenIsTypeVal (token, ')'))
809 vStringPut (signature, ' ');
810 vStringCat (signature, token->string);
814 #define newRToken rNewToken
815 extern tokenInfo *rNewToken (void)
817 return newToken (&rTokenInfoClass);
820 #define tokenReadNoNewline rTokenReadNoNewline
821 extern void rTokenReadNoNewline (tokenInfo *const token)
823 while (1)
825 tokenRead(token);
826 if (!tokenIsTypeVal (token, '\n'))
827 break;
831 static void setupCollectingSignature (tokenInfo *const token,
832 vString *signature,
833 int kindIndexForParams,
834 int corkIndex)
836 R (token)->signature = signature;
837 R (token)->kindIndexForParams = kindIndexForParams;
838 R (token)->scopeIndex = corkIndex;
839 R (token)->parenDepth = 1;
842 extern void rSetupCollectingSignature (tokenInfo *const token,
843 vString *signature)
845 setupCollectingSignature (token, signature,
846 KIND_GHOST_INDEX, CORK_NIL);
849 static void teardownCollectingSignature (tokenInfo *const token)
851 R (token)->parenDepth = 0;
852 R (token)->scopeIndex = CORK_NIL;
853 R (token)->kindIndexForParams = KIND_GHOST_INDEX;
854 R (token)->signature = NULL;
857 extern void rTeardownCollectingSignature (tokenInfo *const token)
859 teardownCollectingSignature (token);
862 static int getKindForToken (tokenInfo *const token)
864 if (tokenIsKeyword (token, R_FUNCTION))
865 return K_FUNCTION;
866 else if (tokenIsKeyword (token, R_C))
867 return K_VECTOR;
868 else if (tokenIsKeyword (token, R_LIST))
869 return K_LIST;
870 else if (tokenIsKeyword (token, R_DATAFRAME))
871 return K_DATAFRAME;
872 return K_GLOBALVAR;
875 static bool findNonPlaceholder (int corkIndex, tagEntryInfo *entry, void *data)
877 bool *any_non_placehoders = data;
878 if (!entry->placeholder)
880 *any_non_placehoders = true;
881 return false;
883 return true;
886 static void parseRightSide (tokenInfo *const token, tokenInfo *const symbol, int parent)
888 R_TRACE_ENTER();
890 char *const assignment_operator = eStrdup (tokenString (token));
891 vString *signature = NULL;
893 tokenReadNoNewline (token);
895 int kind = getKindForToken (token);
897 /* Call sub parsers */
898 int corkIndex = notifyReadRightSideSymbol (symbol,
899 assignment_operator,
900 parent,
901 token);
902 if (corkIndex == CORK_NIL)
904 /* No subparser handle the symbol */
905 corkIndex = makeSimpleRTag (symbol, parent, kind == K_FUNCTION,
906 kind,
907 assignment_operator);
910 if (kind == K_FUNCTION)
912 /* parse signature */
913 tokenReadNoNewline (token);
914 if (tokenIsTypeVal (token, '('))
916 if (corkIndex == CORK_NIL)
917 tokenSkipOverPair (token);
918 else
920 signature = vStringNewInit("(");
921 setupCollectingSignature (token, signature, K_PARAM, corkIndex);
922 tokenSkipOverPair (token);
923 teardownCollectingSignature (token);
925 tokenReadNoNewline (token);
927 parent = (corkIndex == CORK_NIL
928 ? blackHoleIndex
929 : corkIndex);
931 else if (kind == K_VECTOR || kind == K_LIST || kind == K_DATAFRAME)
933 tokenRead (token);
934 parsePair (token, corkIndex, NULL);
935 tokenRead (token);
936 parent = corkIndex;
939 R_TRACE_TOKEN_TEXT("body", token, parent);
941 parseStatement (token, parent, false, false);
943 tagEntryInfo *tag = getEntryInCorkQueue (corkIndex);
944 if (tag)
946 tag->extensionFields.endLine = token->lineNumber;
947 if (signature)
949 tag->extensionFields.signature = vStringDeleteUnwrap(signature);
950 signature = NULL;
952 /* If a vector has no named attribte and it has no lval,
953 * we don't make a tag for the vector. */
954 if ((kind == K_VECTOR || kind == K_LIST || kind == K_DATAFRAME)
955 && *assignment_operator == '\0')
957 bool any_non_placehoders = false;
958 foreachEntriesInScope (corkIndex, NULL,
959 findNonPlaceholder, &any_non_placehoders);
960 if (!any_non_placehoders)
961 tag->placeholder = 1;
965 vStringDelete (signature); /* NULL is acceptable. */
966 eFree (assignment_operator);
967 R_TRACE_LEAVE();
970 /* Parse arguments for library and source. */
971 static bool preParseExternalEntitiy (tokenInfo *const token, tokenInfo *const funcall)
973 TRACE_ENTER();
975 bool r = true;
976 tokenInfo *prefetch_token = newRToken ();
978 tokenReadNoNewline (prefetch_token);
979 if (tokenIsType (prefetch_token, R_SYMBOL)
980 || tokenIsType (prefetch_token, R_STRING))
982 tokenInfo *const loaded_obj_token = newTokenByCopying (prefetch_token);
983 tokenReadNoNewline (prefetch_token);
984 if (tokenIsTypeVal (prefetch_token, ')')
985 || tokenIsTypeVal (prefetch_token, ','))
987 if (tokenIsTypeVal (prefetch_token, ')'))
988 r = false;
990 makeSimpleRefTag (loaded_obj_token->string,
991 (tokenIsKeyword (funcall, R_LIBRARY)
992 ? K_LIBRARY
993 : K_SOURCE),
994 (tokenIsKeyword (funcall, R_LIBRARY)
995 ? (strcmp (tokenString(funcall), "library") == 0
996 ? R_LIBRARY_ATTACHED_BY_LIBRARY
997 : R_LIBRARY_ATTACHED_BY_REQUIRE)
998 : R_SOURCE_LOADED_BY_SOURCE));
999 tokenDelete (loaded_obj_token);
1001 else if (tokenIsEOF (prefetch_token))
1003 tokenCopy (token, prefetch_token);
1004 tokenDelete (loaded_obj_token);
1005 r = false;
1007 else
1009 tokenUnread (prefetch_token);
1010 tokenUnread (loaded_obj_token);
1011 tokenDelete (loaded_obj_token);
1014 else if (tokenIsEOF (prefetch_token))
1016 tokenCopy (token, prefetch_token);
1017 r = false;
1019 else
1020 tokenUnread (prefetch_token);
1022 tokenDelete (prefetch_token);
1024 TRACE_LEAVE_TEXT(r
1025 ? "unread tokens and request parsing again to the upper context"
1026 : "parse all arguments");
1027 return r;
1030 static bool preParseLoopCounter(tokenInfo *const token, int parent)
1032 bool r = true;
1033 TRACE_ENTER();
1035 tokenReadNoNewline (token);
1036 if (tokenIsType (token, R_SYMBOL))
1037 makeSimpleRTag (token, parent, false, K_GLOBALVAR, NULL);
1039 if (tokenIsEOF (token)
1040 || tokenIsTypeVal (token, ')'))
1041 r = false;
1043 TRACE_LEAVE_TEXT(r
1044 ? "unread tokens and request parsing again to the upper context"
1045 : "parse all arguments");
1046 return r;
1050 /* If funcall is non-NULL, this pair represents the argument list for the function
1051 * call for FUNCALL. */
1052 static void parsePair (tokenInfo *const token, int parent, tokenInfo *const funcall)
1054 R_TRACE_ENTER();
1056 bool in_continuous_pair = tokenIsTypeVal (token, '(')
1057 || tokenIsTypeVal (token, '[');
1058 bool is_funcall = funcall && tokenIsTypeVal (token, '(');
1059 bool done = false;
1061 if (is_funcall)
1063 if (tokenIsKeyword (funcall, R_LIBRARY) ||
1064 tokenIsKeyword (funcall, R_SOURCE))
1065 done = !preParseExternalEntitiy (token, funcall);
1066 else if (tokenIsKeyword (funcall, R_FOR))
1067 done = !preParseLoopCounter (token, parent);
1068 else if (notifyReadFuncall (funcall, token, parent) != CORK_NIL)
1069 done = true;
1072 if (done)
1074 R_TRACE_LEAVE();
1075 return;
1080 tokenRead (token);
1081 R_TRACE_TOKEN_TEXT("inside pair", token, parent);
1082 parseStatement (token, parent, (funcall != NULL), in_continuous_pair);
1084 while (! (tokenIsEOF (token)
1085 || tokenIsTypeVal (token, ')')
1086 || tokenIsTypeVal (token, '}')
1087 || tokenIsTypeVal (token, ']')));
1088 R_TRACE_LEAVE();
1091 static bool isAtConstructorInvocation (void)
1093 bool r = false;
1095 tokenInfo *const token = newRToken ();
1096 tokenRead (token);
1097 if (tokenIsTypeVal (token, '('))
1098 r = true;
1099 tokenUnread (token);
1100 tokenDelete (token);
1101 return r;
1104 static bool parseStatement (tokenInfo *const token, int parent,
1105 bool in_arglist, bool in_continuous_pair)
1107 R_TRACE_ENTER();
1108 int last_count = rTokenInfoClass.read_counter;
1112 if (tokenIsEOF (token))
1113 break;
1114 else if (tokenIsTypeVal (token, ';'))
1116 R_TRACE_TOKEN_TEXT ("break with ;", token, parent);
1117 break;
1119 else if (tokenIsTypeVal (token, '\n'))
1121 R_TRACE_TOKEN_TEXT ("break with \\n", token, parent);
1122 break;
1124 else if ((tokenIsKeyword (token, R_FUNCTION)
1125 || ((tokenIsKeyword (token, R_C)
1126 || tokenIsKeyword (token, R_LIST)
1127 || tokenIsKeyword (token, R_DATAFRAME))
1128 && isAtConstructorInvocation ())))
1130 /* This statement doesn't start with a symbol.
1131 * This function is not assigned to any symbol. */
1132 tokenInfo *const anonfunc = newTokenByCopying (token);
1133 int kind = getKindForToken (token);
1134 anonGenerate (anonfunc->string,
1135 kindExtraInfo [kind].anon_prefix, kind);
1136 tokenUnread (token);
1137 vStringClear (token->string);
1138 parseRightSide (token, anonfunc, parent);
1139 tokenDelete (anonfunc);
1141 else if (tokenIsType (token, R_SYMBOL)
1142 || tokenIsType (token, R_STRING)
1143 || tokenIsType (token, R_KEYWORD))
1145 tokenInfo *const symbol = newTokenByCopying (token);
1147 if (in_continuous_pair)
1148 tokenReadNoNewline (token);
1149 else
1150 tokenRead (token);
1152 if (tokenIsType (token, R_LASSIGN))
1154 /* Assignment */
1155 parseRightSide (token, symbol, parent);
1156 R_TRACE_TOKEN_TEXT ("break with right side", token, parent);
1157 tokenDelete(symbol);
1158 break;
1160 else if (tokenIsTypeVal (token, '='))
1162 /* Assignment */
1163 if (in_arglist)
1165 /* Ignore the left side symbol. */
1166 tokenRead (token);
1167 R_TRACE_TOKEN_TEXT("(in arg list) after = body", token, parent);
1169 else
1171 parseRightSide (token, symbol, parent);
1172 R_TRACE_TOKEN_TEXT ("break with right side", token, parent);
1173 tokenDelete(symbol);
1174 break;
1177 else if (tokenIsTypeVal (token, '('))
1179 /* function call */
1180 parsePair (token, parent, symbol);
1181 tokenRead (token);
1182 R_TRACE_TOKEN_TEXT("after arglist", token, parent);
1184 else if (tokenIsTypeVal (token, '$')
1185 || tokenIsTypeVal (token, '@')
1186 || tokenIsType (token, R_SCOPE))
1188 tokenReadNoNewline (token); /* Skip the next identifier */
1189 tokenRead (token);
1190 R_TRACE_TOKEN_TEXT("after $", token, parent);
1192 else
1193 R_TRACE_TOKEN_TEXT("else after symbol", token, parent);
1194 tokenDelete(symbol);
1196 else if (tokenIsType (token, R_RASSIGN))
1198 char *const assignment_operator = eStrdup (tokenString (token));
1199 tokenReadNoNewline (token);
1200 if (tokenIsType (token, R_SYMBOL)
1201 || tokenIsType (token, R_STRING))
1203 makeSimpleRTag (token, parent, false,
1204 K_GLOBALVAR, assignment_operator);
1205 tokenRead (token);
1207 eFree (assignment_operator);
1208 R_TRACE_TOKEN_TEXT("after ->", token, parent);
1210 else if (tokenIsType (token, R_OPERATOR))
1212 tokenReadNoNewline (token);
1213 R_TRACE_TOKEN_TEXT("after operator", token, parent);
1215 else if (tokenIsTypeVal (token, '(')
1216 || tokenIsTypeVal (token, '{')
1217 || tokenIsTypeVal (token, '['))
1219 parsePair (token, parent, NULL);
1220 tokenRead (token);
1221 R_TRACE_TOKEN_TEXT("after pair", token, parent);
1223 else if (tokenIsTypeVal (token, ')')
1224 || tokenIsTypeVal (token, '}')
1225 || tokenIsTypeVal (token, ']'))
1227 R_TRACE_TOKEN_TEXT ("break with close", token, parent);
1228 break;
1230 else if (tokenIsTypeVal (token, '$')
1231 || tokenIsTypeVal (token, '@')
1232 || tokenIsType (token, R_SCOPE))
1234 tokenReadNoNewline (token); /* Skip the next identifier */
1235 tokenRead (token);
1236 R_TRACE_TOKEN_TEXT("after $", token, parent);
1238 else
1240 tokenRead (token);
1241 R_TRACE_TOKEN_TEXT("else", token, parent);
1244 while (!tokenIsEOF (token));
1246 R_TRACE_LEAVE();
1248 return (last_count != rTokenInfoClass.read_counter);
1251 extern bool rParseStatement (tokenInfo *const token, int parentIndex, bool in_arglist)
1253 pushLanguage (Lang_R);
1254 bool r = parseStatement (token, parentIndex, in_arglist, true);
1255 popLanguage ();
1256 return r;
1259 static int notifyReadRightSideSymbol (tokenInfo *const symbol,
1260 const char *const assignmentOperator,
1261 int parent,
1262 tokenInfo *const token)
1264 subparser *sub;
1265 int q = CORK_NIL;
1267 foreachSubparser (sub, false)
1269 rSubparser *rsub = (rSubparser *)sub;
1270 if (rsub->readRightSideSymbol)
1272 enterSubparser (sub);
1273 q = rsub->readRightSideSymbol (rsub, symbol, assignmentOperator, parent, token);
1274 leaveSubparser ();
1275 if (q != CORK_NIL)
1276 break;
1280 return q;
1283 static int makeSimpleSubparserTag (int langType,
1284 tokenInfo *const token, int parent,
1285 bool in_func, int kindInR,
1286 const char *assignmentOperator)
1288 int q = CORK_NIL;
1289 subparser *sub = getLanguageSubparser (langType, false);
1290 if (sub)
1292 rSubparser *rsub = (rSubparser *)sub;
1293 if (rsub->makeTagWithTranslation)
1295 enterSubparser (sub);
1296 q = rsub->makeTagWithTranslation (rsub,
1297 token, parent,
1298 in_func, kindInR,
1299 assignmentOperator);
1300 leaveSubparser ();
1303 return q;
1306 static bool askSubparserTagAcceptancy (tagEntryInfo *pe)
1308 bool q = false;
1309 subparser *sub = getLanguageSubparser (pe->langType, false);
1311 rSubparser *rsub = (rSubparser *)sub;
1312 if (rsub->askTagAcceptancy)
1314 enterSubparser (sub);
1315 q = rsub->askTagAcceptancy (rsub, pe);
1316 leaveSubparser ();
1319 return q;
1322 static bool askSubparserTagHasFunctionAlikeKind (tagEntryInfo *e)
1324 bool q = false;
1325 pushLanguage (Lang_R);
1326 subparser *sub = getLanguageSubparser (e->langType, false);
1327 Assert (sub);
1328 popLanguage ();
1329 rSubparser *rsub = (rSubparser *)sub;
1330 if (rsub->hasFunctionAlikeKind)
1332 enterSubparser (sub);
1333 q = rsub->hasFunctionAlikeKind (rsub, e);
1334 leaveSubparser ();
1336 return q;
1339 static int notifyReadFuncall (tokenInfo *const func,
1340 tokenInfo *const token,
1341 int parent)
1343 int q = CORK_NIL;
1344 subparser *sub;
1345 foreachSubparser (sub, false)
1347 rSubparser *rsub = (rSubparser *)sub;
1348 if (rsub->readFuncall)
1350 enterSubparser (sub);
1351 q = rsub->readFuncall (rsub, func, token, parent);
1352 leaveSubparser ();
1353 if (q != CORK_NIL)
1354 break;
1357 return q;
1360 static void findRTags (void)
1362 tokenInfo *const token = newRToken ();
1364 blackHoleIndex = makePlaceholder ("**BLACK-HOLE/DON'T TAG ME**");
1365 registerEntry (blackHoleIndex);
1367 TRACE_PRINT ("install blackhole: %d", blackHoleIndex);
1371 tokenRead(token);
1372 R_TRACE_TOKEN(token, CORK_NIL);
1373 parseStatement (token, CORK_NIL, false, false);
1375 while (!tokenIsEOF (token));
1377 TRACE_PRINT ("run blackhole", blackHoleIndex);
1378 markAllEntriesInScopeAsPlaceholder (blackHoleIndex);
1380 tokenDelete (token);
1383 static void initializeRParser (const langType language)
1385 Lang_R = language;
1388 extern parserDefinition *RParser (void)
1390 static const char *const extensions[] = { "r", "R", "s", "q", NULL };
1391 parserDefinition *const def = parserNew ("R");
1392 static selectLanguage selectors[] = { selectByArrowOfR,
1393 NULL };
1395 def->extensions = extensions;
1396 def->kindTable = RKinds;
1397 def->kindCount = ARRAY_SIZE(RKinds);
1398 def->fieldTable = RFields;
1399 def->fieldCount = ARRAY_SIZE (RFields);
1400 def->keywordTable = RKeywordTable;
1401 def->keywordCount = ARRAY_SIZE(RKeywordTable);
1402 def->useCork = CORK_QUEUE | CORK_SYMTAB;
1403 def->parser = findRTags;
1404 def->selectLanguage = selectors;
1405 def->initialize = initializeRParser;
1407 return def;
1410 extern vString *rExtractNameFromString (vString* str)
1412 int offset = 0;
1414 if (vStringLength (str) == 0)
1415 return NULL;
1417 char b = vStringChar (str, 0);
1418 if (b == '\'' || b == '"' || b == '`')
1419 offset = 1;
1421 if (offset && vStringLength (str) < 3)
1422 return NULL;
1424 vString *n = vStringNewInit (vStringValue (str) + offset);
1425 if (vStringChar (n, vStringLength (n) - 1) == b)
1426 vStringChop (n);
1428 return n;
1431 #ifdef DEBUG
1432 static const char *tokenTypeStr(enum RTokenType e)
1433 { /* Generated by misc/enumstr.sh with cmdline:
1434 parsers/r.c RTokenType tokenTypeStr TOKEN_R_ --use-lower-bits-as-is */
1435 switch (e)
1437 case TOKEN_R_EOF: return "EOF";
1438 case TOKEN_R_UNDEFINED: return "UNDEFINED";
1439 case TOKEN_R_KEYWORD: return "KEYWORD";
1440 case TOKEN_R_NEWLINE: return "NEWLINE";
1441 case TOKEN_R_NUMBER: return "NUMBER";
1442 case TOKEN_R_SYMBOL: return "SYMBOL";
1443 case TOKEN_R_STRING: return "STRING";
1444 case TOKEN_R_OPERATOR: return "OPERATOR";
1445 case TOKEN_R_DOTS: return "DOTS";
1446 case TOKEN_R_DOTS_N: return "DOTS_N";
1447 case TOKEN_R_LASSIGN: return "LASSIGN";
1448 case TOKEN_R_RASSIGN: return "RASSIGN";
1449 case TOKEN_R_SCOPE: return "SCOPE";
1450 default: break;
1452 static char buf[3];
1453 if (isprint (e))
1455 buf[0] = e;
1456 buf[1] = '\0';
1458 else if (e == '\n')
1460 buf[0] = '\\';
1461 buf[1] = 'n';
1462 buf[2] = '\0';
1464 else
1466 buf[0] = '\0';
1468 return buf;
1470 #endif