2 * Copyright (c) 2003-2004, Ascher Stefan <stievie@utanet.at>
3 * Copyright (c) 2020, Masatake YAMATO <yamato@redhat.com>
4 * Copyright (c) 2020, Red Hat, Inc.
6 * This source code is released for free distribution under the terms of the
7 * GNU General Public License version 2 or (at your option) any later version.
9 * This module contains functions for generating tags for R language files.
10 * R is a programming language for statistical computing.
11 * R is GPL Software, get it from http://www.r-project.org/
13 * The language references are available at
14 * https://cran.r-project.org/manuals.html, and
15 * https://cran.r-project.org/doc/manuals/r-release/R-lang.html
17 * The base library (including library and source functions) release is at
18 * https://stat.ethz.ch/R-manual/R-devel/library/base/html/00Index.html
24 #include "general.h" /* must always come first */
31 #include "selectors.h"
32 #include "tokeninfo.h"
35 #include "subparser.h"
39 #include <ctype.h> /* to define isalpha(), isalnum(), isspace() */
46 #define R_TRACE_TOKEN_TEXT(TXT,T,Q) TRACE_PRINT("<%s> token: %s (%s), parent: %s", \
48 tokenIsTypeVal(T, '\n')? "\\n": tokenString(T), \
49 tokenTypeStr(T->type), \
50 (Q) == CORK_NIL? "": getEntryInCorkQueue(Q)->name)
51 #define R_TRACE_TOKEN(T,Q) TRACE_PRINT("token: %s (%s), parent: %s", \
52 tokenIsTypeVal((T), '\n')? "\\n": tokenString(T), \
53 tokenTypeStr((T)->type), \
54 (Q) == CORK_NIL? "": getEntryInCorkQueue(Q)->name)
56 #define R_TRACE_ENTER() TRACE_ENTER_TEXT("token: %s (%s), parent: %s", \
57 tokenIsTypeVal(token, '\n')? "\\n": tokenString(token), \
58 tokenTypeStr(token->type), \
59 parent == CORK_NIL? "": getEntryInCorkQueue(parent)->name)
60 #define R_TRACE_LEAVE() TRACE_LEAVE()
62 #define R_TRACE_TOKEN_TEXT(TXT,T,Q) do {} while (0);
63 #define R_TRACE_TOKEN(T,Q) do {} while (0);
64 #define R_TRACE_ENTER() do {} while (0);
65 #define R_TRACE_LEAVE() do {} while (0);
88 R_LIBRARY_ATTACHED_BY_LIBRARY
,
89 R_LIBRARY_ATTACHED_BY_REQUIRE
,
93 R_SOURCE_LOADED_BY_SOURCE
,
96 static roleDefinition RLibraryRoles
[] = {
97 { true, "library", "library attached by library function" },
98 { true, "require", "library attached by require function" },
101 static roleDefinition RSourceRoles
[] = {
102 { true, "source", "source loaded by source fucntion" },
105 static kindDefinition RKinds
[KIND_COUNT
] = {
106 {true, 'f', "function", "functions"},
107 {true, 'l', "library", "libraries",
108 .referenceOnly
= true, ATTACH_ROLES (RLibraryRoles
) },
109 {true, 's', "source", "sources",
110 .referenceOnly
= true, ATTACH_ROLES (RSourceRoles
) },
111 {true, 'g', "globalVar", "global variables having values other than function()"},
112 {true, 'v', "functionVar", "function variables having values other than function()"},
113 {false,'z', "parameter", "function parameters inside function definitions" },
114 {true, 'c', "vector", "vectors explicitly created with `c()'" },
115 {true, 'L', "list", "lists explicitly created with `list()'" },
116 {true, 'd', "dataframe", "data frame explicitly created with `data.frame()'" },
117 {true, 'n', "nameattr", "names attribtes in vectors, lists, or dataframes" },
120 struct sKindExtraInfo
{
121 const char *anon_prefix
;
125 static struct sKindExtraInfo kindExtraInfo
[KIND_COUNT
] = {
145 F_ASSIGNMENT_OPERATOR
,
149 static fieldDefinition RFields
[] = {
151 .name
= "assignmentop",
152 .description
= "operator for assignment",
156 .name
= "constructor",
157 .description
= "function used for making value assigned to the nameattr tag",
162 typedef int keywordId
; /* to allow KEYWORD_NONE */
164 static const keywordTable RKeywordTable
[] = {
165 { "c", KEYWORD_R_C
},
166 { "list", KEYWORD_R_LIST
},
167 { "data.frame",KEYWORD_R_DATAFRAME
},
168 { "function", KEYWORD_R_FUNCTION
},
169 { "if", KEYWORD_R_IF
},
170 { "else", KEYWORD_R_ELSE
},
171 { "for", KEYWORD_R_FOR
},
172 { "while", KEYWORD_R_WHILE
},
173 { "repeat", KEYWORD_R_REPEAT
},
174 { "in", KEYWORD_R_IN
},
175 { "next", KEYWORD_R_NEXT
},
176 { "break", KEYWORD_R_BREAK
},
177 { "TRUE", KEYWORD_R_TRUE
, },
178 { "FALSE", KEYWORD_R_FALSE
, },
179 { "NULL", KEYWORD_R_NULL
, },
180 { "Inf", KEYWORD_R_INF
, },
181 { "NaN", KEYWORD_R_NAN
, },
182 { "NA", KEYWORD_R_NA
, },
183 { "NA_integer_", KEYWORD_R_NA
, },
184 { "NA_real_", KEYWORD_R_NA
, },
185 { "NA_complex_", KEYWORD_R_NA
, },
186 { "NA_character_", KEYWORD_R_NA
, },
187 { "source", KEYWORD_R_SOURCE
},
188 { "library", KEYWORD_R_LIBRARY
},
189 { "require", KEYWORD_R_LIBRARY
},
193 static const char *tokenTypeStr(enum RTokenType e
);
196 static struct tokenTypePair typePairs
[] = {
202 typedef struct sRToken
{
207 int kindIndexForParams
; /* Used only when gathering parameters */
210 #define R(TOKEN) ((rToken *)TOKEN)
212 static int blackHoleIndex
;
214 static langType Lang_R
;
216 static void readToken (tokenInfo
*const token
, void *data
);
217 static void clearToken (tokenInfo
*token
);
218 static struct tokenInfoClass rTokenInfoClass
= {
220 .typeForUndefined
= TOKEN_R_UNDEFINED
,
221 .keywordNone
= KEYWORD_NONE
,
222 .typeForKeyword
= TOKEN_R_KEYWORD
,
223 .typeForEOF
= TOKEN_R_EOF
,
224 .extraSpace
= sizeof (rToken
) - sizeof (tokenInfo
),
226 .pairCount
= ARRAY_SIZE (typePairs
),
235 * FUNCTION PROTOTYPES
237 static bool parseStatement (tokenInfo
*const token
, int parent
, bool in_arglist
, bool in_continuous_pair
);
238 static void parsePair (tokenInfo
*const token
, int parent
, tokenInfo
*const funcall
);
240 static int notifyReadRightSideSymbol (tokenInfo
*const symbol
,
241 const char *const assignmentOperator
,
243 tokenInfo
*const token
);
244 static int makeSimpleSubparserTag (int langType
, tokenInfo
*const token
, int parent
,
245 bool in_func
, int kindInR
, const char *assignmentOperator
);
246 static bool askSubparserTagAcceptancy (tagEntryInfo
*pe
);
247 static bool askSubparserTagHasFunctionAlikeKind (tagEntryInfo
*e
);
248 static int notifyReadFuncall (tokenInfo
*const func
, tokenInfo
*const token
, int parent
);
251 * FUNCTION DEFINITIONS
253 static bool hasKindsOrCtors (tagEntryInfo
* e
, int kinds
[], size_t count
)
255 if (e
->langType
== Lang_R
)
257 for (size_t i
= 0; i
< count
; i
++)
259 if (e
->kindIndex
== kinds
[i
])
265 bool function
= false;
266 for (size_t i
= 0; i
< count
; i
++)
268 if (K_FUNCTION
== kinds
[i
])
274 if (function
&& askSubparserTagHasFunctionAlikeKind (e
))
278 const char *tmp
= getParserFieldValueForType (e
,
279 RFields
[F_CONSTRUCTOR
].ftype
);
283 for (size_t i
= 0; i
< count
; i
++)
285 const char * ctor
= kindExtraInfo
[kinds
[i
]].ctor
;
286 if (ctor
&& strcmp (tmp
, ctor
) == 0)
293 static int searchScopeOtherThan (int scope
, int kinds
[], size_t count
)
297 tagEntryInfo
* e
= getEntryInCorkQueue (scope
);
301 if (!hasKindsOrCtors (e
, kinds
, count
))
304 scope
= e
->extensionFields
.scopeIndex
;
309 static int makeSimpleRTagR (tokenInfo
*const token
, int parent
, int kind
,
310 const char * assignmentOp
)
312 if (assignmentOp
&& (strlen (assignmentOp
) == 3))
314 /* <<- or ->> is used here. */
315 if (anyKindsEntryInScopeRecursive (parent
, tokenString (token
),
319 K_PARAM
}, 4) != CORK_NIL
)
325 /* If the tag (T) to be created is defined in a scope and
326 the scope already has another tag having the same name
327 as T, T should not be created. */
328 tagEntryInfo
*pe
= getEntryInCorkQueue (parent
);
329 int cousin
= CORK_NIL
;
330 if (pe
&& ((pe
->langType
== Lang_R
&& pe
->kindIndex
== K_FUNCTION
)
331 || (pe
->langType
!= Lang_R
&& askSubparserTagHasFunctionAlikeKind (pe
))))
333 cousin
= anyEntryInScope (parent
, tokenString (token
));
334 if (kind
== K_GLOBALVAR
)
337 else if (pe
&& (kind
== K_GLOBALVAR
)
338 && hasKindsOrCtors (pe
, (int[]){K_VECTOR
, K_LIST
, K_DATAFRAME
}, 3))
340 parent
= searchScopeOtherThan (pe
->extensionFields
.scopeIndex
,
341 (int[]){K_VECTOR
, K_LIST
, K_DATAFRAME
}, 3);
342 if (parent
== CORK_NIL
)
343 cousin
= anyKindEntryInScope (parent
, tokenString (token
), K_GLOBALVAR
);
346 cousin
= anyKindEntryInScope (parent
, tokenString (token
), K_FUNCVAR
);
352 /* The condition for tagging is a bit relaxed here.
353 Even if the same name tag is created in the scope, a name
354 is tagged if kinds are different. */
355 cousin
= anyKindEntryInScope (parent
, tokenString (token
), kind
);
357 if (cousin
!= CORK_NIL
)
360 int corkIndex
= makeSimpleTag (token
->string
, kind
);
361 tagEntryInfo
*tag
= getEntryInCorkQueue (corkIndex
);
364 tag
->extensionFields
.scopeIndex
= parent
;
367 if (strlen (assignmentOp
) > 0)
368 attachParserField (tag
, true,
369 RFields
[F_ASSIGNMENT_OPERATOR
].ftype
,
372 markTagExtraBit (tag
, XTAG_ANONYMOUS
);
374 registerEntry (corkIndex
);
379 static int makeSimpleRTag (tokenInfo
*const token
, int parent
, bool in_func
, int kind
,
380 const char * assignmentOp
)
383 const char *ctor
= kindExtraInfo
[kind
].ctor
;
384 tagEntryInfo
*pe
= (parent
== CORK_NIL
)? NULL
: getEntryInCorkQueue (parent
);
386 /* makeTagWithTranslation method for subparsers
387 called from makeSimpleSubparserTag expects
388 kind should be resolved. */
389 if (pe
&& hasKindsOrCtors (pe
, (int[]){K_VECTOR
, K_LIST
, K_DATAFRAME
}, 3))
392 && strcmp (assignmentOp
, "=") == 0)
396 bool foreign_tag
= false;
397 if (pe
== NULL
|| pe
->langType
== Lang_R
||
398 !askSubparserTagAcceptancy (pe
))
399 r
= makeSimpleRTagR (token
, parent
, kind
, assignmentOp
);
403 r
= makeSimpleSubparserTag (pe
->langType
, token
, parent
, in_func
,
407 if ((kind
== K_NAMEATTR
|| foreign_tag
) && ctor
)
409 tagEntryInfo
*e
= getEntryInCorkQueue (r
);
411 attachParserField (e
, true,
412 RFields
[F_CONSTRUCTOR
].ftype
,
419 static void clearToken (tokenInfo
*token
)
421 R (token
)->parenDepth
= 0;
422 R (token
)->scopeIndex
= CORK_NIL
;
423 R (token
)->kindIndexForParams
= KIND_GHOST_INDEX
;
424 if (R (token
)->signature
)
426 vStringDelete (R (token
)->signature
);
427 R (token
)->signature
= NULL
;
431 static void readString (tokenInfo
*const token
, void *data
)
434 bool escaped
= false;
436 int c0
= tokenString(token
)[0];
440 c
= getcFromInputFile ();
448 tokenPutc (token
, c
);
449 if (!escaped
&& c
== c0
)
454 tokenPutc (token
, c
);
458 tokenPutc (token
, c
);
465 static void readNumber (tokenInfo
*const token
, void *data
)
471 * Valid numeric constants: 1 10 0.1 .2 1e-7 1.2e+7
472 * Valid integer constants: 1L, 0x10L, 1000000L, 1e6L
473 * Valid numeric constants: 1.1L, 1e-3L, 0x1.1p-2
474 * Valid complex constants: 2i 4.1i 1e-2i
476 while ((c
= getcFromInputFile ()))
478 if (isxdigit (c
) || c
== '.' || c
== 'E'
479 || c
== '+' || c
== '-'
480 || c
== 'L' || c
== 'x' || c
== 'p'
482 tokenPutc (token
, c
);
485 ungetcToInputFile (c
);
491 static void readSymbol (tokenInfo
*const token
, void *data
)
494 while ((c
= getcFromInputFile ()))
496 if (isalnum (c
) || c
== '.' || c
== '_')
497 tokenPutc (token
, c
);
500 ungetcToInputFile (c
);
506 static keywordId
resolveKeyword (vString
*string
)
508 char *s
= vStringValue (string
);
509 static langType lang
= LANG_AUTO
;
511 if (lang
== LANG_AUTO
)
512 lang
= getInputLanguage ();
514 return lookupCaseKeyword (s
, lang
);
517 static bool signatureExpectingParameter (vString
*signature
)
519 if (vStringLast (signature
) == '(')
522 for (size_t i
= vStringLength (signature
); i
> 0; i
--)
524 char c
= vStringChar (signature
, i
- 1);
534 static void readToken (tokenInfo
*const token
, void *data
)
538 token
->type
= TOKEN_R_UNDEFINED
;
539 token
->keyword
= KEYWORD_NONE
;
540 vStringClear (token
->string
);
543 c
= getcFromInputFile ();
544 while (c
== ' ' || c
== '\t' || c
== '\f');
546 token
->lineNumber
= getInputLineNumber ();
547 token
->filePosition
= getInputFilePosition ();
552 token
->type
= TOKEN_R_EOF
;
557 c
= getcFromInputFile ();
560 token
->type
= TOKEN_R_EOF
;
566 tokenPutc (token
, c
);
574 tokenPutc (token
, c
);
579 token
->type
= TOKEN_R_STRING
;
580 tokenPutc (token
, c
);
581 readString (token
, data
);
587 token
->type
= TOKEN_R_OPERATOR
;
588 tokenPutc (token
, c
);
591 token
->type
= TOKEN_R_OPERATOR
;
592 tokenPutc (token
, c
);
593 c
= getcFromInputFile ();
596 tokenPutc (token
, c
);
597 token
->type
= TOKEN_R_SCOPE
;
598 c
= getcFromInputFile ();
600 tokenPutc (token
, c
);
602 ungetcToInputFile (c
);
605 ungetcToInputFile (c
);
610 token
->type
= TOKEN_R_OPERATOR
;
611 tokenPutc (token
, c
);
612 c0
= getcFromInputFile ();
614 tokenPutc (token
, c0
);
616 ungetcToInputFile (c0
);
619 token
->type
= TOKEN_R_OPERATOR
;
620 tokenPutc (token
, c
);
621 c
= getcFromInputFile ();
623 tokenPutc (token
, c
);
627 ungetcToInputFile (c
);
631 token
->type
= TOKEN_R_OPERATOR
;
632 tokenPutc (token
, c
);
633 c
= getcFromInputFile ();
636 token
->type
= TOKEN_R_RASSIGN
;
637 tokenPutc (token
, c
);
638 c
= getcFromInputFile ();
640 tokenPutc (token
, c
);
642 ungetcToInputFile (c
);
645 ungetcToInputFile (c
);
648 token
->type
= TOKEN_R_OPERATOR
;
649 tokenPutc (token
, c
);
650 c
= getcFromInputFile ();
652 tokenPutc (token
, c
);
654 ungetcToInputFile (c
);
657 token
->type
= TOKEN_R_OPERATOR
;
658 tokenPutc (token
, c
);
659 c
= getcFromInputFile ();
664 tokenPutc (token
, c
);
665 c
= getcFromInputFile ();
670 token
->type
= TOKEN_R_LASSIGN
;
671 tokenPutc (token
, c
);
674 tokenPutc (token
, c
);
676 ungetcToInputFile (c
);
679 token
->type
= TOKEN_R_OPERATOR
;
680 tokenPutc (token
, c
);
683 c
= getcFromInputFile ();
687 tokenPutc (token
, c
);
694 token
->type
= TOKEN_R_OPERATOR
;
695 tokenPutc (token
, c
);
696 c
= getcFromInputFile ();
698 tokenPutc (token
, c
);
700 ungetcToInputFile (c
);
712 tokenPutc (token
, c
);
715 tokenPutc (token
, c
);
716 c
= getcFromInputFile ();
719 token
->type
= TOKEN_R_NUMBER
;
720 tokenPutc (token
, c
);
721 readNumber(token
, data
);
723 else if (isalpha (c
) || c
== '_')
725 token
->type
= TOKEN_R_SYMBOL
;
726 tokenPutc (token
, c
);
727 readSymbol (token
, data
);
729 token
->keyword
= resolveKeyword (token
->string
);
730 if (token
->keyword
!= KEYWORD_NONE
)
731 token
->type
= TOKEN_R_KEYWORD
;
735 token
->type
= TOKEN_R_DOTS
;
736 tokenPutc (token
, c
);
738 c
= getcFromInputFile ();
740 tokenPutc (token
, c
);
743 token
->type
= TOKEN_R_DOTS_N
;
746 tokenPutc (token
, c
);
747 c
= getcFromInputFile ();
750 ungetcToInputFile (c
);
752 else if (isalpha (c
) || c
== '_')
754 token
->type
= TOKEN_R_SYMBOL
;
755 tokenPutc (token
, c
);
756 readSymbol (token
, data
);
758 token
->keyword
= resolveKeyword (token
->string
);
759 if (token
->keyword
!= KEYWORD_NONE
)
760 token
->type
= TOKEN_R_KEYWORD
;
764 token
->type
= TOKEN_R_UNDEFINED
;
765 ungetcToInputFile (c
);
770 tokenPutc (token
, c
);
773 token
->type
= TOKEN_R_NUMBER
;
774 readNumber(token
, data
);
776 else if (isalpha (c
))
778 token
->type
= TOKEN_R_SYMBOL
;
779 readSymbol (token
, data
);
781 token
->keyword
= resolveKeyword (token
->string
);
782 if (token
->keyword
!= KEYWORD_NONE
)
783 token
->type
= TOKEN_R_KEYWORD
;
786 token
->type
= TOKEN_R_UNDEFINED
;
790 /* Handle parameters in a signature */
791 if (R(token
)->signature
&& !tokenIsType(token
, R_EOF
) && !tokenIsTypeVal(token
, '\n'))
793 vString
*signature
= R (token
)->signature
;
795 if (tokenIsTypeVal (token
, '('))
796 R (token
)->parenDepth
++;
797 else if (tokenIsTypeVal (token
, ')'))
798 R (token
)->parenDepth
--;
800 if (R (token
)->kindIndexForParams
!= KIND_GHOST_INDEX
801 && R (token
)->parenDepth
== 1 && tokenIsType (token
, R_SYMBOL
)
802 && signatureExpectingParameter (signature
))
803 makeSimpleRTag (token
, R (token
)->scopeIndex
, false,
804 R (token
)->kindIndexForParams
, NULL
);
806 if (vStringLast (signature
) != '(' &&
807 !tokenIsTypeVal (token
, ',') &&
808 !tokenIsTypeVal (token
, ')'))
809 vStringPut (signature
, ' ');
810 vStringCat (signature
, token
->string
);
814 #define newRToken rNewToken
815 extern tokenInfo
*rNewToken (void)
817 return newToken (&rTokenInfoClass
);
820 #define tokenReadNoNewline rTokenReadNoNewline
821 extern void rTokenReadNoNewline (tokenInfo
*const token
)
826 if (!tokenIsTypeVal (token
, '\n'))
831 static void setupCollectingSignature (tokenInfo
*const token
,
833 int kindIndexForParams
,
836 R (token
)->signature
= signature
;
837 R (token
)->kindIndexForParams
= kindIndexForParams
;
838 R (token
)->scopeIndex
= corkIndex
;
839 R (token
)->parenDepth
= 1;
842 extern void rSetupCollectingSignature (tokenInfo
*const token
,
845 setupCollectingSignature (token
, signature
,
846 KIND_GHOST_INDEX
, CORK_NIL
);
849 static void teardownCollectingSignature (tokenInfo
*const token
)
851 R (token
)->parenDepth
= 0;
852 R (token
)->scopeIndex
= CORK_NIL
;
853 R (token
)->kindIndexForParams
= KIND_GHOST_INDEX
;
854 R (token
)->signature
= NULL
;
857 extern void rTeardownCollectingSignature (tokenInfo
*const token
)
859 teardownCollectingSignature (token
);
862 static int getKindForToken (tokenInfo
*const token
)
864 if (tokenIsKeyword (token
, R_FUNCTION
))
866 else if (tokenIsKeyword (token
, R_C
))
868 else if (tokenIsKeyword (token
, R_LIST
))
870 else if (tokenIsKeyword (token
, R_DATAFRAME
))
875 static bool findNonPlaceholder (int corkIndex
, tagEntryInfo
*entry
, void *data
)
877 bool *any_non_placehoders
= data
;
878 if (!entry
->placeholder
)
880 *any_non_placehoders
= true;
886 static void parseRightSide (tokenInfo
*const token
, tokenInfo
*const symbol
, int parent
)
890 char *const assignment_operator
= eStrdup (tokenString (token
));
891 vString
*signature
= NULL
;
893 tokenReadNoNewline (token
);
895 int kind
= getKindForToken (token
);
897 /* Call sub parsers */
898 int corkIndex
= notifyReadRightSideSymbol (symbol
,
902 if (corkIndex
== CORK_NIL
)
904 /* No subparser handle the symbol */
905 corkIndex
= makeSimpleRTag (symbol
, parent
, kind
== K_FUNCTION
,
907 assignment_operator
);
910 if (kind
== K_FUNCTION
)
912 /* parse signature */
913 tokenReadNoNewline (token
);
914 if (tokenIsTypeVal (token
, '('))
916 if (corkIndex
== CORK_NIL
)
917 tokenSkipOverPair (token
);
920 signature
= vStringNewInit("(");
921 setupCollectingSignature (token
, signature
, K_PARAM
, corkIndex
);
922 tokenSkipOverPair (token
);
923 teardownCollectingSignature (token
);
925 tokenReadNoNewline (token
);
927 parent
= (corkIndex
== CORK_NIL
931 else if (kind
== K_VECTOR
|| kind
== K_LIST
|| kind
== K_DATAFRAME
)
934 parsePair (token
, corkIndex
, NULL
);
939 R_TRACE_TOKEN_TEXT("body", token
, parent
);
941 parseStatement (token
, parent
, false, false);
943 tagEntryInfo
*tag
= getEntryInCorkQueue (corkIndex
);
946 tag
->extensionFields
.endLine
= token
->lineNumber
;
949 tag
->extensionFields
.signature
= vStringDeleteUnwrap(signature
);
952 /* If a vector has no named attribte and it has no lval,
953 * we don't make a tag for the vector. */
954 if ((kind
== K_VECTOR
|| kind
== K_LIST
|| kind
== K_DATAFRAME
)
955 && *assignment_operator
== '\0')
957 bool any_non_placehoders
= false;
958 foreachEntriesInScope (corkIndex
, NULL
,
959 findNonPlaceholder
, &any_non_placehoders
);
960 if (!any_non_placehoders
)
961 tag
->placeholder
= 1;
965 vStringDelete (signature
); /* NULL is acceptable. */
966 eFree (assignment_operator
);
970 /* Parse arguments for library and source. */
971 static bool preParseExternalEntitiy (tokenInfo
*const token
, tokenInfo
*const funcall
)
976 tokenInfo
*prefetch_token
= newRToken ();
978 tokenReadNoNewline (prefetch_token
);
979 if (tokenIsType (prefetch_token
, R_SYMBOL
)
980 || tokenIsType (prefetch_token
, R_STRING
))
982 tokenInfo
*const loaded_obj_token
= newTokenByCopying (prefetch_token
);
983 tokenReadNoNewline (prefetch_token
);
984 if (tokenIsTypeVal (prefetch_token
, ')')
985 || tokenIsTypeVal (prefetch_token
, ','))
987 if (tokenIsTypeVal (prefetch_token
, ')'))
990 makeSimpleRefTag (loaded_obj_token
->string
,
991 (tokenIsKeyword (funcall
, R_LIBRARY
)
994 (tokenIsKeyword (funcall
, R_LIBRARY
)
995 ? (strcmp (tokenString(funcall
), "library") == 0
996 ? R_LIBRARY_ATTACHED_BY_LIBRARY
997 : R_LIBRARY_ATTACHED_BY_REQUIRE
)
998 : R_SOURCE_LOADED_BY_SOURCE
));
999 tokenDelete (loaded_obj_token
);
1001 else if (tokenIsEOF (prefetch_token
))
1003 tokenCopy (token
, prefetch_token
);
1004 tokenDelete (loaded_obj_token
);
1009 tokenUnread (prefetch_token
);
1010 tokenUnread (loaded_obj_token
);
1011 tokenDelete (loaded_obj_token
);
1014 else if (tokenIsEOF (prefetch_token
))
1016 tokenCopy (token
, prefetch_token
);
1020 tokenUnread (prefetch_token
);
1022 tokenDelete (prefetch_token
);
1025 ? "unread tokens and request parsing again to the upper context"
1026 : "parse all arguments");
1030 static bool preParseLoopCounter(tokenInfo
*const token
, int parent
)
1035 tokenReadNoNewline (token
);
1036 if (tokenIsType (token
, R_SYMBOL
))
1037 makeSimpleRTag (token
, parent
, false, K_GLOBALVAR
, NULL
);
1039 if (tokenIsEOF (token
)
1040 || tokenIsTypeVal (token
, ')'))
1044 ? "unread tokens and request parsing again to the upper context"
1045 : "parse all arguments");
1050 /* If funcall is non-NULL, this pair represents the argument list for the function
1051 * call for FUNCALL. */
1052 static void parsePair (tokenInfo
*const token
, int parent
, tokenInfo
*const funcall
)
1056 bool in_continuous_pair
= tokenIsTypeVal (token
, '(')
1057 || tokenIsTypeVal (token
, '[');
1058 bool is_funcall
= funcall
&& tokenIsTypeVal (token
, '(');
1063 if (tokenIsKeyword (funcall
, R_LIBRARY
) ||
1064 tokenIsKeyword (funcall
, R_SOURCE
))
1065 done
= !preParseExternalEntitiy (token
, funcall
);
1066 else if (tokenIsKeyword (funcall
, R_FOR
))
1067 done
= !preParseLoopCounter (token
, parent
);
1068 else if (notifyReadFuncall (funcall
, token
, parent
) != CORK_NIL
)
1081 R_TRACE_TOKEN_TEXT("inside pair", token
, parent
);
1082 parseStatement (token
, parent
, (funcall
!= NULL
), in_continuous_pair
);
1084 while (! (tokenIsEOF (token
)
1085 || tokenIsTypeVal (token
, ')')
1086 || tokenIsTypeVal (token
, '}')
1087 || tokenIsTypeVal (token
, ']')));
1091 static bool isAtConstructorInvocation (void)
1095 tokenInfo
*const token
= newRToken ();
1097 if (tokenIsTypeVal (token
, '('))
1099 tokenUnread (token
);
1100 tokenDelete (token
);
1104 static bool parseStatement (tokenInfo
*const token
, int parent
,
1105 bool in_arglist
, bool in_continuous_pair
)
1108 int last_count
= rTokenInfoClass
.read_counter
;
1112 if (tokenIsEOF (token
))
1114 else if (tokenIsTypeVal (token
, ';'))
1116 R_TRACE_TOKEN_TEXT ("break with ;", token
, parent
);
1119 else if (tokenIsTypeVal (token
, '\n'))
1121 R_TRACE_TOKEN_TEXT ("break with \\n", token
, parent
);
1124 else if ((tokenIsKeyword (token
, R_FUNCTION
)
1125 || ((tokenIsKeyword (token
, R_C
)
1126 || tokenIsKeyword (token
, R_LIST
)
1127 || tokenIsKeyword (token
, R_DATAFRAME
))
1128 && isAtConstructorInvocation ())))
1130 /* This statement doesn't start with a symbol.
1131 * This function is not assigned to any symbol. */
1132 tokenInfo
*const anonfunc
= newTokenByCopying (token
);
1133 int kind
= getKindForToken (token
);
1134 anonGenerate (anonfunc
->string
,
1135 kindExtraInfo
[kind
].anon_prefix
, kind
);
1136 tokenUnread (token
);
1137 vStringClear (token
->string
);
1138 parseRightSide (token
, anonfunc
, parent
);
1139 tokenDelete (anonfunc
);
1141 else if (tokenIsType (token
, R_SYMBOL
)
1142 || tokenIsType (token
, R_STRING
)
1143 || tokenIsType (token
, R_KEYWORD
))
1145 tokenInfo
*const symbol
= newTokenByCopying (token
);
1147 if (in_continuous_pair
)
1148 tokenReadNoNewline (token
);
1152 if (tokenIsType (token
, R_LASSIGN
))
1155 parseRightSide (token
, symbol
, parent
);
1156 R_TRACE_TOKEN_TEXT ("break with right side", token
, parent
);
1157 tokenDelete(symbol
);
1160 else if (tokenIsTypeVal (token
, '='))
1165 /* Ignore the left side symbol. */
1167 R_TRACE_TOKEN_TEXT("(in arg list) after = body", token
, parent
);
1171 parseRightSide (token
, symbol
, parent
);
1172 R_TRACE_TOKEN_TEXT ("break with right side", token
, parent
);
1173 tokenDelete(symbol
);
1177 else if (tokenIsTypeVal (token
, '('))
1180 parsePair (token
, parent
, symbol
);
1182 R_TRACE_TOKEN_TEXT("after arglist", token
, parent
);
1184 else if (tokenIsTypeVal (token
, '$')
1185 || tokenIsTypeVal (token
, '@')
1186 || tokenIsType (token
, R_SCOPE
))
1188 tokenReadNoNewline (token
); /* Skip the next identifier */
1190 R_TRACE_TOKEN_TEXT("after $", token
, parent
);
1193 R_TRACE_TOKEN_TEXT("else after symbol", token
, parent
);
1194 tokenDelete(symbol
);
1196 else if (tokenIsType (token
, R_RASSIGN
))
1198 char *const assignment_operator
= eStrdup (tokenString (token
));
1199 tokenReadNoNewline (token
);
1200 if (tokenIsType (token
, R_SYMBOL
)
1201 || tokenIsType (token
, R_STRING
))
1203 makeSimpleRTag (token
, parent
, false,
1204 K_GLOBALVAR
, assignment_operator
);
1207 eFree (assignment_operator
);
1208 R_TRACE_TOKEN_TEXT("after ->", token
, parent
);
1210 else if (tokenIsType (token
, R_OPERATOR
))
1212 tokenReadNoNewline (token
);
1213 R_TRACE_TOKEN_TEXT("after operator", token
, parent
);
1215 else if (tokenIsTypeVal (token
, '(')
1216 || tokenIsTypeVal (token
, '{')
1217 || tokenIsTypeVal (token
, '['))
1219 parsePair (token
, parent
, NULL
);
1221 R_TRACE_TOKEN_TEXT("after pair", token
, parent
);
1223 else if (tokenIsTypeVal (token
, ')')
1224 || tokenIsTypeVal (token
, '}')
1225 || tokenIsTypeVal (token
, ']'))
1227 R_TRACE_TOKEN_TEXT ("break with close", token
, parent
);
1230 else if (tokenIsTypeVal (token
, '$')
1231 || tokenIsTypeVal (token
, '@')
1232 || tokenIsType (token
, R_SCOPE
))
1234 tokenReadNoNewline (token
); /* Skip the next identifier */
1236 R_TRACE_TOKEN_TEXT("after $", token
, parent
);
1241 R_TRACE_TOKEN_TEXT("else", token
, parent
);
1244 while (!tokenIsEOF (token
));
1248 return (last_count
!= rTokenInfoClass
.read_counter
);
1251 extern bool rParseStatement (tokenInfo
*const token
, int parentIndex
, bool in_arglist
)
1253 pushLanguage (Lang_R
);
1254 bool r
= parseStatement (token
, parentIndex
, in_arglist
, true);
1259 static int notifyReadRightSideSymbol (tokenInfo
*const symbol
,
1260 const char *const assignmentOperator
,
1262 tokenInfo
*const token
)
1267 foreachSubparser (sub
, false)
1269 rSubparser
*rsub
= (rSubparser
*)sub
;
1270 if (rsub
->readRightSideSymbol
)
1272 enterSubparser (sub
);
1273 q
= rsub
->readRightSideSymbol (rsub
, symbol
, assignmentOperator
, parent
, token
);
1283 static int makeSimpleSubparserTag (int langType
,
1284 tokenInfo
*const token
, int parent
,
1285 bool in_func
, int kindInR
,
1286 const char *assignmentOperator
)
1289 subparser
*sub
= getLanguageSubparser (langType
, false);
1292 rSubparser
*rsub
= (rSubparser
*)sub
;
1293 if (rsub
->makeTagWithTranslation
)
1295 enterSubparser (sub
);
1296 q
= rsub
->makeTagWithTranslation (rsub
,
1299 assignmentOperator
);
1306 static bool askSubparserTagAcceptancy (tagEntryInfo
*pe
)
1309 subparser
*sub
= getLanguageSubparser (pe
->langType
, false);
1311 rSubparser
*rsub
= (rSubparser
*)sub
;
1312 if (rsub
->askTagAcceptancy
)
1314 enterSubparser (sub
);
1315 q
= rsub
->askTagAcceptancy (rsub
, pe
);
1322 static bool askSubparserTagHasFunctionAlikeKind (tagEntryInfo
*e
)
1325 pushLanguage (Lang_R
);
1326 subparser
*sub
= getLanguageSubparser (e
->langType
, false);
1329 rSubparser
*rsub
= (rSubparser
*)sub
;
1330 if (rsub
->hasFunctionAlikeKind
)
1332 enterSubparser (sub
);
1333 q
= rsub
->hasFunctionAlikeKind (rsub
, e
);
1339 static int notifyReadFuncall (tokenInfo
*const func
,
1340 tokenInfo
*const token
,
1345 foreachSubparser (sub
, false)
1347 rSubparser
*rsub
= (rSubparser
*)sub
;
1348 if (rsub
->readFuncall
)
1350 enterSubparser (sub
);
1351 q
= rsub
->readFuncall (rsub
, func
, token
, parent
);
1360 static void findRTags (void)
1362 tokenInfo
*const token
= newRToken ();
1364 blackHoleIndex
= makePlaceholder ("**BLACK-HOLE/DON'T TAG ME**");
1365 registerEntry (blackHoleIndex
);
1367 TRACE_PRINT ("install blackhole: %d", blackHoleIndex
);
1372 R_TRACE_TOKEN(token
, CORK_NIL
);
1373 parseStatement (token
, CORK_NIL
, false, false);
1375 while (!tokenIsEOF (token
));
1377 TRACE_PRINT ("run blackhole", blackHoleIndex
);
1378 markAllEntriesInScopeAsPlaceholder (blackHoleIndex
);
1380 tokenDelete (token
);
1383 static void initializeRParser (const langType language
)
1388 extern parserDefinition
*RParser (void)
1390 static const char *const extensions
[] = { "r", "R", "s", "q", NULL
};
1391 parserDefinition
*const def
= parserNew ("R");
1392 static selectLanguage selectors
[] = { selectByArrowOfR
,
1395 def
->extensions
= extensions
;
1396 def
->kindTable
= RKinds
;
1397 def
->kindCount
= ARRAY_SIZE(RKinds
);
1398 def
->fieldTable
= RFields
;
1399 def
->fieldCount
= ARRAY_SIZE (RFields
);
1400 def
->keywordTable
= RKeywordTable
;
1401 def
->keywordCount
= ARRAY_SIZE(RKeywordTable
);
1402 def
->useCork
= CORK_QUEUE
| CORK_SYMTAB
;
1403 def
->parser
= findRTags
;
1404 def
->selectLanguage
= selectors
;
1405 def
->initialize
= initializeRParser
;
1410 extern vString
*rExtractNameFromString (vString
* str
)
1414 if (vStringLength (str
) == 0)
1417 char b
= vStringChar (str
, 0);
1418 if (b
== '\'' || b
== '"' || b
== '`')
1421 if (offset
&& vStringLength (str
) < 3)
1424 vString
*n
= vStringNewInit (vStringValue (str
) + offset
);
1425 if (vStringChar (n
, vStringLength (n
) - 1) == b
)
1432 static const char *tokenTypeStr(enum RTokenType e
)
1433 { /* Generated by misc/enumstr.sh with cmdline:
1434 parsers/r.c RTokenType tokenTypeStr TOKEN_R_ --use-lower-bits-as-is */
1437 case TOKEN_R_EOF
: return "EOF";
1438 case TOKEN_R_UNDEFINED
: return "UNDEFINED";
1439 case TOKEN_R_KEYWORD
: return "KEYWORD";
1440 case TOKEN_R_NEWLINE
: return "NEWLINE";
1441 case TOKEN_R_NUMBER
: return "NUMBER";
1442 case TOKEN_R_SYMBOL
: return "SYMBOL";
1443 case TOKEN_R_STRING
: return "STRING";
1444 case TOKEN_R_OPERATOR
: return "OPERATOR";
1445 case TOKEN_R_DOTS
: return "DOTS";
1446 case TOKEN_R_DOTS_N
: return "DOTS_N";
1447 case TOKEN_R_LASSIGN
: return "LASSIGN";
1448 case TOKEN_R_RASSIGN
: return "RASSIGN";
1449 case TOKEN_R_SCOPE
: return "SCOPE";