2 * Copyright (c) 2016, Szymon Tomasz Stefanek
4 * This source code is released for free distribution under the terms of the
5 * GNU General Public License version 2 or (at your option) any later version.
7 * This module contains functions for parsing and scanning C++ source files
9 #include "cxx_parser.h"
10 #include "cxx_parser_internal.h"
12 #include "cxx_debug.h"
13 #include "cxx_keyword.h"
14 #include "cxx_token.h"
15 #include "cxx_token_chain.h"
19 #include "../cpreprocessor.h"
27 #define UINFO(c) (((c) < 0x80 && (c) >= 0) ? g_aCharTable[c].uType : 0)
29 static void cxxParserSkipToNonWhiteSpace(void)
31 while(cppIsspace(g_cxx
.iChar
))
32 g_cxx
.iChar
= cppGetc();
37 // Start of an identifier a-z A-Z _ and ~ since
38 // it's part of the destructor name
39 CXXCharTypeStartOfIdentifier
= 1,
40 // Part of identifier a-z a-Z 0-9 _
41 CXXCharTypePartOfIdentifier
= (1 << 1),
43 CXXCharTypeDecimalDigit
= (1 << 2),
44 // A hexadecimal digit
45 CXXCharTypeHexadecimalDigit
= (1 << 3),
46 // Hex digits x X u U l L and .
47 CXXCharTypeValidInNumber
= (1 << 4),
48 // A named single char token.
49 CXXCharTypeNamedSingleCharToken
= (1 << 5),
50 // A named single or repeated char token.
51 CXXCharTypeNamedSingleOrRepeatedCharToken
= (1 << 6),
52 // An operator (we merge them)
53 CXXCharTypeOperator
= (1 << 7),
54 // Full custom handling. Mostly operators or brackets.
55 CXXCharTypeCustomHandling
= (1 << 8)
58 typedef struct _CXXCharTypeData
61 unsigned int uSingleTokenType
;
62 unsigned int uMultiTokenType
;
66 static CXXCharTypeData g_aCharTable
[128] =
122 // 009 (0x09) '\t' HT
128 // 010 (0x0a) '\n' LF
134 // 011 (0x0b) '\v' VT
146 // 013 (0x0d) '\r' CR
286 CXXCharTypeStartOfIdentifier
| CXXCharTypePartOfIdentifier
,
298 CXXCharTypeNamedSingleOrRepeatedCharToken
,
300 CXXTokenTypeMultipleAnds
310 CXXCharTypeNamedSingleCharToken
,
311 CXXTokenTypeOpeningParenthesis
,
316 CXXCharTypeNamedSingleCharToken
,
317 CXXTokenTypeClosingParenthesis
,
322 CXXCharTypeNamedSingleCharToken
,
334 CXXCharTypeNamedSingleCharToken
,
346 CXXCharTypeValidInNumber
| CXXCharTypeNamedSingleOrRepeatedCharToken
,
347 CXXTokenTypeDotOperator
,
348 CXXTokenTypeMultipleDots
358 CXXCharTypePartOfIdentifier
| CXXCharTypeDecimalDigit
|
359 CXXCharTypeHexadecimalDigit
| CXXCharTypeValidInNumber
,
365 CXXCharTypePartOfIdentifier
| CXXCharTypeDecimalDigit
|
366 CXXCharTypeHexadecimalDigit
| CXXCharTypeValidInNumber
,
372 CXXCharTypePartOfIdentifier
| CXXCharTypeDecimalDigit
|
373 CXXCharTypeHexadecimalDigit
| CXXCharTypeValidInNumber
,
379 CXXCharTypePartOfIdentifier
| CXXCharTypeDecimalDigit
|
380 CXXCharTypeHexadecimalDigit
| CXXCharTypeValidInNumber
,
386 CXXCharTypePartOfIdentifier
| CXXCharTypeDecimalDigit
|
387 CXXCharTypeHexadecimalDigit
| CXXCharTypeValidInNumber
,
393 CXXCharTypePartOfIdentifier
| CXXCharTypeDecimalDigit
|
394 CXXCharTypeHexadecimalDigit
| CXXCharTypeValidInNumber
,
400 CXXCharTypePartOfIdentifier
| CXXCharTypeDecimalDigit
|
401 CXXCharTypeHexadecimalDigit
| CXXCharTypeValidInNumber
,
407 CXXCharTypePartOfIdentifier
| CXXCharTypeDecimalDigit
|
408 CXXCharTypeHexadecimalDigit
| CXXCharTypeValidInNumber
,
414 CXXCharTypePartOfIdentifier
| CXXCharTypeDecimalDigit
|
415 CXXCharTypeHexadecimalDigit
| CXXCharTypeValidInNumber
,
421 CXXCharTypePartOfIdentifier
| CXXCharTypeDecimalDigit
|
422 CXXCharTypeHexadecimalDigit
| CXXCharTypeValidInNumber
,
428 CXXCharTypeNamedSingleOrRepeatedCharToken
,
429 CXXTokenTypeSingleColon
,
430 CXXTokenTypeMultipleColons
434 CXXCharTypeNamedSingleCharToken
,
435 CXXTokenTypeSemicolon
,
440 CXXCharTypeCustomHandling
,
441 CXXTokenTypeSmallerThanSign
,
446 CXXCharTypeOperator
| CXXCharTypeNamedSingleOrRepeatedCharToken
,
447 CXXTokenTypeAssignment
,
450 // 062 (0x3e) '>' // We never merge two >>
452 CXXCharTypeNamedSingleCharToken
,
453 CXXTokenTypeGreaterThanSign
,
470 CXXCharTypeStartOfIdentifier
| CXXCharTypePartOfIdentifier
|
471 CXXCharTypeHexadecimalDigit
| CXXCharTypeValidInNumber
,
477 CXXCharTypeStartOfIdentifier
| CXXCharTypePartOfIdentifier
|
478 CXXCharTypeHexadecimalDigit
| CXXCharTypeValidInNumber
,
484 CXXCharTypeStartOfIdentifier
| CXXCharTypePartOfIdentifier
|
485 CXXCharTypeHexadecimalDigit
| CXXCharTypeValidInNumber
,
491 CXXCharTypeStartOfIdentifier
| CXXCharTypePartOfIdentifier
|
492 CXXCharTypeHexadecimalDigit
| CXXCharTypeValidInNumber
,
498 CXXCharTypeStartOfIdentifier
| CXXCharTypePartOfIdentifier
|
499 CXXCharTypeHexadecimalDigit
| CXXCharTypeValidInNumber
,
505 CXXCharTypeStartOfIdentifier
| CXXCharTypePartOfIdentifier
|
506 CXXCharTypeHexadecimalDigit
| CXXCharTypeValidInNumber
,
512 CXXCharTypeStartOfIdentifier
| CXXCharTypePartOfIdentifier
,
518 CXXCharTypeStartOfIdentifier
| CXXCharTypePartOfIdentifier
|
519 CXXCharTypeValidInNumber
,
525 CXXCharTypeStartOfIdentifier
| CXXCharTypePartOfIdentifier
,
531 CXXCharTypeStartOfIdentifier
| CXXCharTypePartOfIdentifier
,
537 CXXCharTypeStartOfIdentifier
| CXXCharTypePartOfIdentifier
,
543 CXXCharTypeStartOfIdentifier
| CXXCharTypePartOfIdentifier
|
544 CXXCharTypeValidInNumber
,
550 CXXCharTypeStartOfIdentifier
| CXXCharTypePartOfIdentifier
,
556 CXXCharTypeStartOfIdentifier
| CXXCharTypePartOfIdentifier
,
562 CXXCharTypeStartOfIdentifier
| CXXCharTypePartOfIdentifier
,
568 CXXCharTypeStartOfIdentifier
| CXXCharTypePartOfIdentifier
,
574 CXXCharTypeStartOfIdentifier
| CXXCharTypePartOfIdentifier
,
580 CXXCharTypeStartOfIdentifier
| CXXCharTypePartOfIdentifier
,
586 CXXCharTypeStartOfIdentifier
| CXXCharTypePartOfIdentifier
,
592 CXXCharTypeStartOfIdentifier
| CXXCharTypePartOfIdentifier
,
598 CXXCharTypeStartOfIdentifier
| CXXCharTypePartOfIdentifier
|
599 CXXCharTypeValidInNumber
,
605 CXXCharTypeStartOfIdentifier
| CXXCharTypePartOfIdentifier
,
611 CXXCharTypeStartOfIdentifier
| CXXCharTypePartOfIdentifier
,
617 CXXCharTypeStartOfIdentifier
| CXXCharTypePartOfIdentifier
|
618 CXXCharTypeValidInNumber
,
624 CXXCharTypeStartOfIdentifier
| CXXCharTypePartOfIdentifier
,
630 CXXCharTypeStartOfIdentifier
| CXXCharTypePartOfIdentifier
,
636 CXXCharTypeCustomHandling
,
637 CXXTokenTypeOpeningSquareParenthesis
,
648 CXXCharTypeNamedSingleCharToken
,
649 CXXTokenTypeClosingSquareParenthesis
,
660 CXXCharTypeStartOfIdentifier
| CXXCharTypePartOfIdentifier
,
672 CXXCharTypeStartOfIdentifier
| CXXCharTypePartOfIdentifier
|
673 CXXCharTypeHexadecimalDigit
| CXXCharTypeValidInNumber
,
679 CXXCharTypeStartOfIdentifier
| CXXCharTypePartOfIdentifier
|
680 CXXCharTypeHexadecimalDigit
| CXXCharTypeValidInNumber
,
686 CXXCharTypeStartOfIdentifier
| CXXCharTypePartOfIdentifier
|
687 CXXCharTypeHexadecimalDigit
| CXXCharTypeValidInNumber
,
693 CXXCharTypeStartOfIdentifier
| CXXCharTypePartOfIdentifier
|
694 CXXCharTypeHexadecimalDigit
| CXXCharTypeValidInNumber
,
700 CXXCharTypeStartOfIdentifier
| CXXCharTypePartOfIdentifier
|
701 CXXCharTypeHexadecimalDigit
| CXXCharTypeValidInNumber
,
707 CXXCharTypeStartOfIdentifier
| CXXCharTypePartOfIdentifier
|
708 CXXCharTypeHexadecimalDigit
| CXXCharTypeValidInNumber
,
714 CXXCharTypeStartOfIdentifier
| CXXCharTypePartOfIdentifier
,
720 CXXCharTypeStartOfIdentifier
| CXXCharTypePartOfIdentifier
|
721 CXXCharTypeValidInNumber
,
727 CXXCharTypeStartOfIdentifier
| CXXCharTypePartOfIdentifier
,
733 CXXCharTypeStartOfIdentifier
| CXXCharTypePartOfIdentifier
,
739 CXXCharTypeStartOfIdentifier
| CXXCharTypePartOfIdentifier
,
745 CXXCharTypeStartOfIdentifier
| CXXCharTypePartOfIdentifier
|
746 CXXCharTypeValidInNumber
,
752 CXXCharTypeStartOfIdentifier
| CXXCharTypePartOfIdentifier
,
758 CXXCharTypeStartOfIdentifier
| CXXCharTypePartOfIdentifier
,
764 CXXCharTypeStartOfIdentifier
| CXXCharTypePartOfIdentifier
,
770 CXXCharTypeStartOfIdentifier
| CXXCharTypePartOfIdentifier
,
776 CXXCharTypeStartOfIdentifier
| CXXCharTypePartOfIdentifier
,
782 CXXCharTypeStartOfIdentifier
| CXXCharTypePartOfIdentifier
,
788 CXXCharTypeStartOfIdentifier
| CXXCharTypePartOfIdentifier
,
794 CXXCharTypeStartOfIdentifier
| CXXCharTypePartOfIdentifier
,
800 CXXCharTypeStartOfIdentifier
| CXXCharTypePartOfIdentifier
|
801 CXXCharTypeValidInNumber
,
807 CXXCharTypeStartOfIdentifier
| CXXCharTypePartOfIdentifier
,
813 CXXCharTypeStartOfIdentifier
| CXXCharTypePartOfIdentifier
,
819 CXXCharTypeStartOfIdentifier
| CXXCharTypePartOfIdentifier
|
820 CXXCharTypeValidInNumber
,
826 CXXCharTypeStartOfIdentifier
| CXXCharTypePartOfIdentifier
,
832 CXXCharTypeStartOfIdentifier
| CXXCharTypePartOfIdentifier
,
838 CXXCharTypeNamedSingleCharToken
,
839 CXXTokenTypeOpeningBracket
,
850 CXXCharTypeNamedSingleCharToken
,
851 CXXTokenTypeClosingBracket
,
856 CXXCharTypeStartOfIdentifier
,
864 // Parse the contents of an attribute chain.
865 // The input is the innermost chain of __attribute__((...)) or [[...]]
866 static void cxxParserAnalyzeAttributeChain(CXXTokenChain
* pChain
)
868 CXXToken
* pToken
= cxxTokenChainFirst(pChain
);
872 if(cxxTokenTypeIs(pToken
,CXXTokenTypeIdentifier
))
874 CXX_DEBUG_PRINT("Analyzing attribute %s",vStringValue(pToken
->pszWord
));
876 (strcmp(vStringValue(pToken
->pszWord
),"always_inline") == 0) ||
877 (strcmp(vStringValue(pToken
->pszWord
),"__always_inline__") == 0)
880 CXX_DEBUG_PRINT("Found attribute 'always_inline'");
881 // assume "inline" has been seen.
882 g_cxx
.uKeywordState
|= CXXParserKeywordStateSeenInline
;
884 (strcmp(vStringValue(pToken
->pszWord
),"deprecated") == 0) ||
885 (strcmp(vStringValue(pToken
->pszWord
),"__deprecated__") == 0)
888 CXX_DEBUG_PRINT("Found attribute 'deprecated'");
889 // assume "inline" has been seen.
890 g_cxx
.uKeywordState
|= CXXParserKeywordStateSeenAttributeDeprecated
;
894 pToken
= pToken
->pNext
;
899 // The __attribute__((...)) sequence complicates parsing quite a lot.
900 // For this reason we attempt to "hide" it from the rest of the parser
901 // at tokenizer level. However, we will not kill it. For extracting interesting
902 // information from the sequence in upper layers, attach the token chain
903 // built from the sequence to the token AROUND the sequence.
904 // In this function, we call the token "attributes owner" token.
905 // CXXToken::pSideChain is the member for attaching.
907 // Returns false if it finds an EOF. This is an important invariant required by
908 // cxxParserParseNextToken(), the only caller.
910 static bool cxxParserParseNextTokenCondenseAttribute(void)
912 // Since cxxParserParseNextToken() returns false only when it has found
913 // an EOF, this function must do the same.
914 // This means that any broken input must be discarded here.
919 cxxTokenTypeIs(g_cxx
.pToken
,CXXTokenTypeKeyword
) &&
920 (cxxKeywordMayDropInTokenizer(g_cxx
.pToken
->eKeyword
)),
921 "This function should be called only after we have parsed __attribute__ or __declspec"
924 CXXToken
* pAttrHead
= cxxTokenChainTakeLast(g_cxx
.pTokenChain
);
928 if(!cxxParserParseNextToken())
930 cxxTokenDestroy(pAttrHead
);
931 CXX_DEBUG_LEAVE_TEXT("No next token after __attribute__");
935 if(!cxxTokenTypeIs(g_cxx
.pToken
,CXXTokenTypeOpeningParenthesis
))
937 cxxTokenDestroy(pAttrHead
);
938 CXX_DEBUG_LEAVE_TEXT("Something that is not an opening parenthesis");
942 // Do NOT accept EOF as a valid terminator as it implies broken input.
943 if(!cxxParserParseAndCondenseCurrentSubchain(
944 CXXTokenTypeOpeningParenthesis
|
945 CXXTokenTypeOpeningSquareParenthesis
|
946 CXXTokenTypeOpeningBracket
,
951 // Parsing and/or condensation of the subchain failed. This implies broken
952 // input (mismatched parenthesis/bracket, early EOF).
954 CXX_DEBUG_LEAVE_TEXT("Failed to parse subchains. The input is broken...");
956 cxxTokenDestroy(pAttrHead
);
958 // However our invariant (see comment at the beginning of the function)
959 // forbids us to return false if we didn't find an EOF. So we attempt
960 // to resume parsing anyway. If there is an EOF, cxxParserParseNextToken()
963 // Kill the token chain
964 cxxTokenChainDestroyLast(g_cxx
.pTokenChain
);
966 return cxxParserParseNextToken();
970 cxxTokenTypeIs(g_cxx
.pToken
,CXXTokenTypeParenthesisChain
),
971 "Should have a parenthesis chain as last token!"
974 // Try to make sense of certain kinds of __attribute__.
975 // the proper syntax is __attribute__(()), so look at the inner chain
977 CXXToken
* pInner
= cxxTokenChainFirst(g_cxx
.pToken
->pChain
);
980 if(pInner
->pNext
&& cxxTokenTypeIs(pInner
->pNext
,CXXTokenTypeParenthesisChain
))
981 cxxParserAnalyzeAttributeChain(pInner
->pNext
->pChain
);
984 CXXToken
* pAttrArgs
= cxxTokenChainTakeLast(g_cxx
.pTokenChain
);
985 CXXToken
* pAttrOwner
= cxxTokenChainLast(g_cxx
.pTokenChain
);
987 // And finally extract yet another token.
988 bool bRet
= cxxParserParseNextToken();
990 if(pAttrOwner
== NULL
991 || cxxTokenTypeIs(pAttrOwner
, CXXTokenTypeComma
)) {
992 // If __attribute__ was at the beginning of the chain,
993 // we cannot attach the __attribute__ side chain to
994 // the previous token.
995 // In that case, we attach the side chain to the
997 pAttrOwner
= g_cxx
.pToken
;
999 // Look up a previous identifier token.
1000 CXXToken
* p
= cxxTokenChainPreviousTokenOfType(pAttrOwner
,
1001 CXXTokenTypeIdentifier
);
1008 if(!pAttrOwner
->pSideChain
)
1009 pAttrOwner
->pSideChain
= cxxTokenChainCreate();
1010 cxxTokenChainAppend(pAttrOwner
->pSideChain
, pAttrHead
);
1011 cxxTokenChainAppend(pAttrOwner
->pSideChain
, pAttrArgs
);
1013 fprintf(stderr
, "pAttrOwner(%s#%d): ",
1014 pAttrOwner
== g_cxx
.pToken
? "next": "prev",
1015 pAttrOwner
->iLineNumber
);
1016 CXX_DEBUG_TOKEN(pAttrOwner
);
1017 fprintf(stderr
, "Side chain: ");
1018 if(pAttrOwner
->pSideChain
)
1019 CXX_DEBUG_CHAIN(pAttrOwner
->pSideChain
);
1021 CXX_DEBUG_PRINT("NULL\n");
1030 // We handle the attribute [[...]] sequence introduced in c++11 in the same way
1031 // as __attribute__((...)). We move it out of the parser's way as it complicates parsing.
1033 // Returns false if it finds an EOF. This is an important invariant required by
1034 // cxxParserParseNextToken(), the only caller.
1036 static bool cxxParserParseNextTokenCondenseCXX11Attribute(void)
1041 cxxTokenTypeIs(g_cxx
.pToken
, CXXTokenTypeOpeningSquareParenthesis
),
1042 "This function should be called only after we have parsed ["
1045 // Input stream: [[...
1046 // If the syntax is correct then this is an attribute sequence [[foo]]
1048 // g_cxx.pToken points the first '['.
1049 // g_cxx.iChar points the second '['.
1051 // A caller calls this function only when the second '[' is found.
1053 if(!cxxParserParseAndCondenseCurrentSubchain(
1054 CXXTokenTypeOpeningParenthesis
|
1055 CXXTokenTypeOpeningSquareParenthesis
|
1056 CXXTokenTypeOpeningBracket
,
1061 // Parsing and/or condensation of the subchain failed. This implies broken
1062 // input (mismatched parenthesis/bracket, early EOF).
1064 CXX_DEBUG_LEAVE_TEXT("Failed to parse subchains. The input is broken...");
1066 // However our invariant
1067 // forbids us to return false if we didn't find an EOF. So we attempt
1068 // to resume parsing anyway. If there is an EOF, cxxParserParseNextToken()
1071 // Kill the token chain
1072 cxxTokenChainDestroyLast(g_cxx
.pTokenChain
);
1074 return cxxParserParseNextToken();
1077 // Now the current token should be replaced by a square parenthesis chain
1078 // that contains another square parenthesis chain.
1080 cxxTokenTypeIs(g_cxx
.pToken
,CXXTokenTypeSquareParenthesisChain
),
1081 "Should have a parenthesis chain as last token!"
1084 // at least [ + [*] + ]
1085 (g_cxx
.pToken
->pChain
->iCount
>= 3) &&
1087 cxxTokenChainAt(g_cxx
.pToken
->pChain
,1),
1088 CXXTokenTypeSquareParenthesisChain
1090 "Should have a nested parenthesis chain inside the last token!"
1093 cxxParserAnalyzeAttributeChain(
1094 cxxTokenChainAt(g_cxx
.pToken
->pChain
,1)->pChain
1097 // Now just kill it.
1098 cxxTokenChainDestroyLast(g_cxx
.pTokenChain
);
1100 // And finally extract yet another token.
1101 bool bRet
= cxxParserParseNextToken();
1107 // A macro token was encountered and it expects a parameter list.
1108 // The routine has to check if there is a following parenthesis
1109 // and eventually skip it but it MUST NOT parse the next token
1110 // if it is not a parenthesis. This is because the macro token
1111 // may have a replacement and is that one that has to be returned
1112 // back to the caller from cxxParserParseNextToken().
1113 static bool cxxParserParseNextTokenSkipMacroParenthesis(CXXToken
** ppChain
)
1117 CXX_DEBUG_ASSERT(ppChain
,"ppChain should not be null here");
1119 cxxParserSkipToNonWhiteSpace();
1121 if(g_cxx
.iChar
!= '(')
1124 return true; // no parenthesis
1127 if(!cxxParserParseNextToken())
1129 CXX_DEBUG_LEAVE_TEXT("No next token after ignored identifier");
1133 if(!cxxTokenTypeIs(g_cxx
.pToken
,CXXTokenTypeOpeningParenthesis
))
1135 CXX_DEBUG_ASSERT(false,"Should have found an open parenthesis token here!");
1136 CXX_DEBUG_LEAVE_TEXT("Internal error");
1140 if(!cxxParserParseAndCondenseCurrentSubchain(
1141 CXXTokenTypeOpeningParenthesis
,
1146 CXX_DEBUG_LEAVE_TEXT("Failed to parse and condense subchains");
1151 cxxTokenTypeIs(g_cxx
.pToken
,CXXTokenTypeParenthesisChain
),
1152 "Should have a parenthesis chain as last token!"
1155 // Now just kill the chain.
1156 *ppChain
= cxxTokenChainTakeLast(g_cxx
.pTokenChain
);
1162 static void cxxParserParseNextTokenApplyReplacement(
1163 cppMacroInfo
* pInfo
,
1164 CXXToken
* pParameterChainToken
1169 CXX_DEBUG_ASSERT(pInfo
,"Info must be not null");
1170 CXX_DEBUG_ASSERT(pInfo
->replacements
,"There should be a replacement");
1172 if(!pInfo
->hasParameterList
)
1174 CXX_DEBUG_ASSERT(!pParameterChainToken
,"This shouldn't have been extracted");
1177 CXXTokenChain
* pParameters
= NULL
;
1178 const char ** aParameters
= NULL
;
1179 int iParameterCount
= 0;
1181 if(pInfo
->hasParameterList
&& pParameterChainToken
&& (pParameterChainToken
->pChain
->iCount
>= 3))
1184 cxxTokenChainDestroyFirst(pParameterChainToken
->pChain
);
1185 cxxTokenChainDestroyLast(pParameterChainToken
->pChain
);
1187 pParameters
= cxxTokenChainSplitOnComma(
1188 pParameterChainToken
->pChain
1191 aParameters
= (const char **)eMalloc(sizeof(const char *) * pParameters
->iCount
);
1192 CXXToken
* pParam
= cxxTokenChainFirst(pParameters
);
1195 aParameters
[iParameterCount
] = vStringValue(pParam
->pszWord
);
1197 pParam
= pParam
->pNext
;
1200 CXX_DEBUG_ASSERT(iParameterCount
== pParameters
->iCount
,"Bad number of parameters found");
1203 vString
* pReplacement
= cppBuildMacroReplacement(pInfo
,aParameters
,iParameterCount
);
1207 cxxTokenChainDestroy(pParameters
);
1208 eFree((char**)aParameters
);
1211 CXX_DEBUG_PRINT("Applying complex replacement '%s'",vStringValue(pReplacement
));
1213 cppUngetStringBuiltByMacro(vStringValue(pReplacement
),vStringLength(pReplacement
), pInfo
);
1215 vStringDelete(pReplacement
);
1220 void cxxParserUngetCurrentToken(void)
1224 g_cxx
.pTokenChain
&&
1225 (g_cxx
.pTokenChain
->iCount
> 0),
1226 "There should be at least one token to unget"
1229 if(g_cxx
.pUngetToken
)
1231 if(g_cxx
.pUngetToken
->bFollowedBySpace
)
1233 cppUngetString(vStringValue(g_cxx
.pUngetToken
->pszWord
),vStringLength(g_cxx
.pUngetToken
->pszWord
));
1234 cxxTokenDestroy(g_cxx
.pUngetToken
);
1237 g_cxx
.pUngetToken
= cxxTokenChainTakeLast(g_cxx
.pTokenChain
);
1239 CXX_DEBUG_ASSERT(g_cxx
.pUngetToken
== g_cxx
.pToken
,"Oops.. ungot a token that was not the chain tail");
1241 g_cxx
.pToken
= cxxTokenChainLast(g_cxx
.pTokenChain
);
1245 #define CXX_PARSER_MAXIMUM_TOKEN_CHAIN_SIZE 16384
1247 // We stop applying macro replacements if the unget buffer gets too big
1248 // as it is a sign of recursive macro expansion
1249 #define CXX_PARSER_MAXIMUM_UNGET_BUFFER_SIZE_FOR_MACRO_REPLACEMENTS 65536
1251 // We stop applying macro replacements if a macro is used so many
1252 // times in a recursive macro expansion.
1253 #define CXX_PARSER_MAXIMUM_MACRO_USE_COUNT 8
1255 // Returns false if it finds an EOF. Returns true otherwise.
1257 // In some special cases this function may parse more than one token,
1258 // however only a single token will always be returned.
1259 bool cxxParserParseNextToken(void)
1261 // The token chain should not be allowed to grow arbitrarily large.
1262 // The token structures are quite big and it's easy to grow up to
1263 // 5-6GB or memory usage. However this limit should be large enough
1264 // to accommodate all the reasonable statements that could have some
1265 // information in them. This includes multiple function prototypes
1266 // in a single statement (ImageMagick has some examples) but probably
1267 // does NOT include large data tables.
1268 int iInitialTokenChainSize
= g_cxx
.pTokenChain
->iCount
;
1269 if(iInitialTokenChainSize
>= CXX_PARSER_MAXIMUM_TOKEN_CHAIN_SIZE
)
1270 cxxTokenChainDestroyLast(g_cxx
.pTokenChain
);
1272 if(g_cxx
.pUngetToken
)
1274 // got some tokens in the unget chain.
1275 cxxTokenChainAppend(g_cxx
.pTokenChain
,g_cxx
.pUngetToken
);
1277 g_cxx
.pToken
= g_cxx
.pUngetToken
;
1279 g_cxx
.pUngetToken
= NULL
;
1281 return !cxxTokenTypeIs(g_cxx
.pToken
,CXXTokenTypeEOF
);
1284 CXXToken
* t
= cxxTokenCreate();
1286 cxxTokenChainAppend(g_cxx
.pTokenChain
,t
);
1290 cxxParserSkipToNonWhiteSpace();
1292 // FIXME: this cpp handling is kind of broken:
1293 // it works only because the moon is in the correct phase.
1294 cppBeginStatement();
1296 // This must be done after getting char from input
1297 t
->iLineNumber
= getInputLineNumber();
1298 t
->oFilePosition
= getInputFilePosition();
1300 if(g_cxx
.iChar
== EOF
)
1302 t
->eType
= CXXTokenTypeEOF
;
1303 t
->bFollowedBySpace
= false;
1307 unsigned int uInfo
= UINFO(g_cxx
.iChar
);
1309 //fprintf(stderr,"Char %c %02x info %u\n",g_cxx.iChar,g_cxx.iChar,uInfo);
1311 if(uInfo
& CXXCharTypeStartOfIdentifier
)
1314 t
->eType
= CXXTokenTypeIdentifier
;
1315 t
->bFollowedBySpace
= false;
1317 vStringPut(t
->pszWord
,g_cxx
.iChar
);
1319 // special case for tile, which may actually be an operator
1320 if(g_cxx
.iChar
== '~')
1322 // may be followed by space!
1323 g_cxx
.iChar
= cppGetc();
1324 if(cppIsspace(g_cxx
.iChar
))
1326 t
->bFollowedBySpace
= true;
1327 g_cxx
.iChar
= cppGetc();
1328 while(cppIsspace(g_cxx
.iChar
))
1329 g_cxx
.iChar
= cppGetc();
1333 uInfo
= UINFO(g_cxx
.iChar
);
1334 if(!(uInfo
& CXXCharTypeStartOfIdentifier
))
1336 // this is not an identifier after all
1337 t
->eType
= CXXTokenTypeOperator
;
1338 if((!t
->bFollowedBySpace
) && g_cxx
.iChar
== '=')
1340 // make ~= single token so it's not handled as
1341 // a separate assignment
1342 vStringPut(t
->pszWord
,g_cxx
.iChar
);
1343 g_cxx
.iChar
= cppGetc();
1344 t
->bFollowedBySpace
= cppIsspace(g_cxx
.iChar
);
1349 g_cxx
.iChar
= cppGetc();
1354 uInfo
= UINFO(g_cxx
.iChar
);
1355 if(!(uInfo
& CXXCharTypePartOfIdentifier
))
1357 vStringPut(t
->pszWord
,g_cxx
.iChar
);
1358 g_cxx
.iChar
= cppGetc();
1361 int iCXXKeyword
= lookupKeyword(t
->pszWord
->buffer
,g_cxx
.eLangType
);
1362 if(iCXXKeyword
>= 0)
1364 if(cxxKeywordIsDisabled((CXXKeyword
)iCXXKeyword
))
1366 t
->eType
= CXXTokenTypeIdentifier
;
1369 t
->eType
= CXXTokenTypeKeyword
;
1370 t
->eKeyword
= (CXXKeyword
)iCXXKeyword
;
1373 if(cxxKeywordMayDropInTokenizer(iCXXKeyword
))
1375 // special handling for __attribute__ and __declspec
1376 return cxxParserParseNextTokenCondenseAttribute();
1381 cppMacroInfo
* pMacro
= cppFindMacro(vStringValue(t
->pszWord
));
1384 if(pMacro
&& (pMacro
->useCount
>= CXX_PARSER_MAXIMUM_MACRO_USE_COUNT
))
1386 /* If the macro is overly used, report it here. */
1387 CXX_DEBUG_PRINT("Overly uesd macro %s <%p> useCount: %d (> %d)",
1389 pMacro
, pMacro
->useCount
,
1390 CXX_PARSER_MAXIMUM_MACRO_USE_COUNT
);
1394 if(pMacro
&& (pMacro
->useCount
< CXX_PARSER_MAXIMUM_MACRO_USE_COUNT
))
1396 CXX_DEBUG_PRINT("Macro %s <%p> useCount: %d", pMacro
->name
,
1397 pMacro
, pMacro
->useCount
);
1399 cxxTokenChainDestroyLast(g_cxx
.pTokenChain
);
1401 CXXToken
* pParameterChain
= NULL
;
1403 if(pMacro
->hasParameterList
)
1405 CXX_DEBUG_PRINT("Macro has parameter list");
1406 if(!cxxParserParseNextTokenSkipMacroParenthesis(&pParameterChain
))
1410 // This is used to avoid infinite recursion in substitution
1411 // (things like -D foo=foo or similar)
1413 if(pMacro
->replacements
)
1415 CXX_DEBUG_PRINT("The token has replacements: applying");
1418 // Exclude possible cases of recursive macro expansion that
1419 // causes level nesting
1421 (g_cxx
.iNestingLevels
< CXX_PARSER_MAXIMUM_NESTING_LEVELS
) &&
1422 // Exclude possible cases of recursive macro expansion that
1423 // causes a single token chain to grow too big
1425 (iInitialTokenChainSize
< CXX_PARSER_MAXIMUM_TOKEN_CHAIN_SIZE
) &&
1426 // Detect other cases of nasty macro expansion that cause
1427 // the unget buffer to grow fast (but the token chain to grow slowly)
1428 // -D'p=a' -D'a=p+p'
1429 (cppUngetBufferSize() < CXX_PARSER_MAXIMUM_UNGET_BUFFER_SIZE_FOR_MACRO_REPLACEMENTS
)
1433 cppUngetc(g_cxx
.iChar
);
1434 // unget the replacement
1435 cxxParserParseNextTokenApplyReplacement(
1440 g_cxx
.iChar
= cppGetc();
1442 // Possibly a recursive macro
1444 "Token has replacement but either nesting level is too "
1445 "big (%d), the token chain (%d) or the unget buffer (%d) "
1446 "have grown too large",
1447 g_cxx
.iNestingLevels
,
1448 g_cxx
.pTokenChain
->iCount
,
1449 cppUngetBufferSize()
1455 cxxTokenDestroy(pParameterChain
);
1457 g_cxx
.iNestingLevels
++;
1458 // Have no token to return: parse it
1459 CXX_DEBUG_PRINT("Parse inner token");
1460 bool bRet
= cxxParserParseNextToken();
1461 CXX_DEBUG_PRINT("Parsed inner token: %s type %d",g_cxx
.pToken
->pszWord
->buffer
,g_cxx
.pToken
->eType
);
1462 g_cxx
.iNestingLevels
--;
1467 t
->bFollowedBySpace
= cppIsspace(g_cxx
.iChar
);
1472 if(g_cxx
.iChar
== '-')
1474 // special case for pointer
1475 vStringPut(t
->pszWord
,g_cxx
.iChar
);
1476 g_cxx
.iChar
= cppGetc();
1477 if(g_cxx
.iChar
== '>')
1479 t
->eType
= CXXTokenTypePointerOperator
;
1480 vStringPut(t
->pszWord
,g_cxx
.iChar
);
1481 g_cxx
.iChar
= cppGetc();
1483 t
->eType
= CXXTokenTypeOperator
;
1484 if(g_cxx
.iChar
== '-')
1486 vStringPut(t
->pszWord
,g_cxx
.iChar
);
1487 g_cxx
.iChar
= cppGetc();
1490 t
->bFollowedBySpace
= cppIsspace(g_cxx
.iChar
);
1495 // As long as we use cppGetc() we don't need this
1497 if(g_cxx
.iChar
== '"')
1499 // special case for strings
1500 t
->eType
= CXXTokenTypeStringConstant
;
1501 vStringPut(t
->pszWord
,g_cxx
.iChar
);
1502 // We don't even care of storing the other chars: we don't need
1504 // FIXME: We might need them in signature:() tag.. maybe add
1505 // them up to a certain length only?
1508 g_cxx
.iChar
= cppGetc();
1509 if(g_cxx
.iChar
== EOF
)
1511 t
->bFollowedBySpace
= false;
1514 if(g_cxx
.iChar
== '\\')
1517 g_cxx
.iChar
= cppGetc();
1518 if(g_cxx
.iChar
== EOF
)
1520 t
->bFollowedBySpace
= false;
1523 } else if(g_cxx
.iChar
== '"')
1525 g_cxx
.iChar
= cppGetc();
1529 t
->bFollowedBySpace
= cppIsspace(g_cxx
.iChar
);
1533 if(g_cxx
.iChar
== CPP_STRING_SYMBOL
)
1535 t
->eType
= CXXTokenTypeStringConstant
;
1536 cppVStringPut(t
->pszWord
,g_cxx
.iChar
);
1537 g_cxx
.iChar
= cppGetc();
1538 t
->bFollowedBySpace
= cppIsspace(g_cxx
.iChar
);
1544 // As long as we use cppGetc() we don't need this
1545 if(g_cxx
.iChar
== '\'')
1547 // special case for strings
1548 t
->eType
= CXXTokenTypeCharacterConstant
;
1549 vStringPut(t
->pszWord
,g_cxx
.iChar
);
1550 // We don't even care storing the other chars: we don't
1551 // need them for parsing
1554 g_cxx
.iChar
= cppGetc();
1555 if(g_cxx
.iChar
== EOF
)
1557 t
->bFollowedBySpace
= false;
1560 if(g_cxx
.iChar
== '\\')
1563 g_cxx
.iChar
= cppGetc();
1564 if(g_cxx
.iChar
== EOF
)
1566 t
->bFollowedBySpace
= false;
1569 } else if(g_cxx
.iChar
== '\'')
1571 g_cxx
.iChar
= cppGetc();
1575 t
->bFollowedBySpace
= cppIsspace(g_cxx
.iChar
);
1579 if(g_cxx
.iChar
== CPP_CHAR_SYMBOL
)
1581 t
->eType
= CXXTokenTypeCharacterConstant
;
1582 cppVStringPut(t
->pszWord
,g_cxx
.iChar
);
1583 g_cxx
.iChar
= cppGetc();
1584 t
->bFollowedBySpace
= cppIsspace(g_cxx
.iChar
);
1589 if(uInfo
& CXXCharTypeDecimalDigit
)
1592 t
->eType
= CXXTokenTypeNumber
;
1593 vStringPut(t
->pszWord
,g_cxx
.iChar
);
1597 g_cxx
.iChar
= cppGetc();
1598 uInfo
= UINFO(g_cxx
.iChar
);
1599 if(!(uInfo
& CXXCharTypeValidInNumber
))
1601 vStringPut(t
->pszWord
,g_cxx
.iChar
);
1604 t
->bFollowedBySpace
= cppIsspace(g_cxx
.iChar
);
1608 if(uInfo
& CXXCharTypeNamedSingleOrRepeatedCharToken
)
1610 t
->eType
= g_aCharTable
[g_cxx
.iChar
].uSingleTokenType
;
1611 vStringPut(t
->pszWord
,g_cxx
.iChar
);
1612 int iChar
= g_cxx
.iChar
;
1613 g_cxx
.iChar
= cppGetc();
1614 if(g_cxx
.iChar
== iChar
)
1616 t
->eType
= g_aCharTable
[g_cxx
.iChar
].uMultiTokenType
;
1617 // We could signal a syntax error with more than two colons
1618 // or equal signs...but we're tolerant
1620 vStringPut(t
->pszWord
,g_cxx
.iChar
);
1621 g_cxx
.iChar
= cppGetc();
1622 } while(g_cxx
.iChar
== iChar
);
1624 t
->bFollowedBySpace
= cppIsspace(g_cxx
.iChar
);
1628 if(uInfo
& CXXCharTypeCustomHandling
)
1630 t
->eType
= g_aCharTable
[g_cxx
.iChar
].uSingleTokenType
;
1631 vStringPut(t
->pszWord
,g_cxx
.iChar
);
1632 g_cxx
.iChar
= cppGetc();
1635 case CXXTokenTypeSmallerThanSign
:
1636 // The < sign is used in templates and is problematic if parsed incorrectly.
1637 // We must exctract only the valid operator types: <, <<, <<=, <= <=>
1642 t
->eType
= CXXTokenTypeOperator
;
1643 vStringPut(t
->pszWord
,g_cxx
.iChar
);
1644 g_cxx
.iChar
= cppGetc();
1645 if(g_cxx
.iChar
== '=')
1648 vStringPut(t
->pszWord
,g_cxx
.iChar
);
1649 g_cxx
.iChar
= cppGetc();
1654 t
->eType
= CXXTokenTypeOperator
;
1655 vStringPut(t
->pszWord
,g_cxx
.iChar
);
1656 g_cxx
.iChar
= cppGetc();
1657 if(g_cxx
.iChar
== '>')
1660 vStringPut(t
->pszWord
,g_cxx
.iChar
);
1661 g_cxx
.iChar
= cppGetc();
1669 t
->bFollowedBySpace
= cppIsspace(g_cxx
.iChar
);
1671 case CXXTokenTypeOpeningSquareParenthesis
:
1672 // special handling for [[ attribute ]] which can appear almost anywhere
1673 // in the source code and is kind of annoying for the parser.
1675 t
->bFollowedBySpace
= cppIsspace(g_cxx
.iChar
);
1677 if(t
->bFollowedBySpace
)
1679 // The tokens can be separated by a space, at least according to gcc.
1681 g_cxx
.iChar
= cppGetc();
1682 } while(cppIsspace(g_cxx
.iChar
));
1685 if(g_cxx
.iChar
== '[')
1686 return cxxParserParseNextTokenCondenseCXX11Attribute();
1689 CXX_DEBUG_ASSERT(false,"There should be a custom handler for this token type");
1690 // treat as single token type in non debug builds
1691 t
->bFollowedBySpace
= cppIsspace(g_cxx
.iChar
);
1698 if(uInfo
& CXXCharTypeNamedSingleCharToken
)
1700 t
->eType
= g_aCharTable
[g_cxx
.iChar
].uSingleTokenType
;
1701 vStringPut(t
->pszWord
,g_cxx
.iChar
);
1702 g_cxx
.iChar
= cppGetc();
1703 t
->bFollowedBySpace
= cppIsspace(g_cxx
.iChar
);
1707 if(uInfo
& CXXCharTypeOperator
)
1709 t
->eType
= CXXTokenTypeOperator
;
1710 vStringPut(t
->pszWord
,g_cxx
.iChar
);
1711 g_cxx
.iChar
= cppGetc();
1712 uInfo
= UINFO(g_cxx
.iChar
);
1713 while(uInfo
& CXXCharTypeOperator
)
1715 vStringPut(t
->pszWord
,g_cxx
.iChar
);
1716 g_cxx
.iChar
= cppGetc();
1717 uInfo
= UINFO(g_cxx
.iChar
);
1719 t
->bFollowedBySpace
= cppIsspace(g_cxx
.iChar
);
1723 t
->eType
= CXXTokenTypeUnknown
;
1724 cppVStringPut(t
->pszWord
,g_cxx
.iChar
);
1725 g_cxx
.iChar
= cppGetc();
1726 t
->bFollowedBySpace
= cppIsspace(g_cxx
.iChar
);