[clang-format] Annotate ampamp after new/delete as BinaryOperator (#89033)
[llvm-project.git] / clang / lib / Format / TokenAnnotator.cpp
bloba679683077ac949e38ce55dc388fe9256977535b
1 //===--- TokenAnnotator.cpp - Format C++ code -----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements a token annotator, i.e. creates
11 /// \c AnnotatedTokens out of \c FormatTokens with required extra information.
12 ///
13 //===----------------------------------------------------------------------===//
15 #include "TokenAnnotator.h"
16 #include "FormatToken.h"
17 #include "clang/Basic/SourceManager.h"
18 #include "clang/Basic/TokenKinds.h"
19 #include "llvm/ADT/SmallPtrSet.h"
20 #include "llvm/Support/Debug.h"
22 #define DEBUG_TYPE "format-token-annotator"
24 namespace clang {
25 namespace format {
27 static bool mustBreakAfterAttributes(const FormatToken &Tok,
28 const FormatStyle &Style) {
29 switch (Style.BreakAfterAttributes) {
30 case FormatStyle::ABS_Always:
31 return true;
32 case FormatStyle::ABS_Leave:
33 return Tok.NewlinesBefore > 0;
34 default:
35 return false;
39 namespace {
41 /// Returns \c true if the line starts with a token that can start a statement
42 /// with an initializer.
43 static bool startsWithInitStatement(const AnnotatedLine &Line) {
44 return Line.startsWith(tok::kw_for) || Line.startsWith(tok::kw_if) ||
45 Line.startsWith(tok::kw_switch);
48 /// Returns \c true if the token can be used as an identifier in
49 /// an Objective-C \c \@selector, \c false otherwise.
50 ///
51 /// Because getFormattingLangOpts() always lexes source code as
52 /// Objective-C++, C++ keywords like \c new and \c delete are
53 /// lexed as tok::kw_*, not tok::identifier, even for Objective-C.
54 ///
55 /// For Objective-C and Objective-C++, both identifiers and keywords
56 /// are valid inside @selector(...) (or a macro which
57 /// invokes @selector(...)). So, we allow treat any identifier or
58 /// keyword as a potential Objective-C selector component.
59 static bool canBeObjCSelectorComponent(const FormatToken &Tok) {
60 return Tok.Tok.getIdentifierInfo();
63 /// With `Left` being '(', check if we're at either `[...](` or
64 /// `[...]<...>(`, where the [ opens a lambda capture list.
65 static bool isLambdaParameterList(const FormatToken *Left) {
66 // Skip <...> if present.
67 if (Left->Previous && Left->Previous->is(tok::greater) &&
68 Left->Previous->MatchingParen &&
69 Left->Previous->MatchingParen->is(TT_TemplateOpener)) {
70 Left = Left->Previous->MatchingParen;
73 // Check for `[...]`.
74 return Left->Previous && Left->Previous->is(tok::r_square) &&
75 Left->Previous->MatchingParen &&
76 Left->Previous->MatchingParen->is(TT_LambdaLSquare);
79 /// Returns \c true if the token is followed by a boolean condition, \c false
80 /// otherwise.
81 static bool isKeywordWithCondition(const FormatToken &Tok) {
82 return Tok.isOneOf(tok::kw_if, tok::kw_for, tok::kw_while, tok::kw_switch,
83 tok::kw_constexpr, tok::kw_catch);
86 /// Returns \c true if the token starts a C++ attribute, \c false otherwise.
87 static bool isCppAttribute(bool IsCpp, const FormatToken &Tok) {
88 if (!IsCpp || !Tok.startsSequence(tok::l_square, tok::l_square))
89 return false;
90 // The first square bracket is part of an ObjC array literal
91 if (Tok.Previous && Tok.Previous->is(tok::at))
92 return false;
93 const FormatToken *AttrTok = Tok.Next->Next;
94 if (!AttrTok)
95 return false;
96 // C++17 '[[using ns: foo, bar(baz, blech)]]'
97 // We assume nobody will name an ObjC variable 'using'.
98 if (AttrTok->startsSequence(tok::kw_using, tok::identifier, tok::colon))
99 return true;
100 if (AttrTok->isNot(tok::identifier))
101 return false;
102 while (AttrTok && !AttrTok->startsSequence(tok::r_square, tok::r_square)) {
103 // ObjC message send. We assume nobody will use : in a C++11 attribute
104 // specifier parameter, although this is technically valid:
105 // [[foo(:)]].
106 if (AttrTok->is(tok::colon) ||
107 AttrTok->startsSequence(tok::identifier, tok::identifier) ||
108 AttrTok->startsSequence(tok::r_paren, tok::identifier)) {
109 return false;
111 if (AttrTok->is(tok::ellipsis))
112 return true;
113 AttrTok = AttrTok->Next;
115 return AttrTok && AttrTok->startsSequence(tok::r_square, tok::r_square);
118 /// A parser that gathers additional information about tokens.
120 /// The \c TokenAnnotator tries to match parenthesis and square brakets and
121 /// store a parenthesis levels. It also tries to resolve matching "<" and ">"
122 /// into template parameter lists.
123 class AnnotatingParser {
124 public:
125 AnnotatingParser(const FormatStyle &Style, AnnotatedLine &Line,
126 const AdditionalKeywords &Keywords,
127 SmallVector<ScopeType> &Scopes)
128 : Style(Style), Line(Line), CurrentToken(Line.First), AutoFound(false),
129 IsCpp(Style.isCpp()), Keywords(Keywords), Scopes(Scopes) {
130 Contexts.push_back(Context(tok::unknown, 1, /*IsExpression=*/false));
131 resetTokenMetadata();
134 private:
135 ScopeType getScopeType(const FormatToken &Token) const {
136 switch (Token.getType()) {
137 case TT_FunctionLBrace:
138 case TT_LambdaLBrace:
139 return ST_Function;
140 case TT_ClassLBrace:
141 case TT_StructLBrace:
142 case TT_UnionLBrace:
143 return ST_Class;
144 default:
145 return ST_Other;
149 bool parseAngle() {
150 if (!CurrentToken || !CurrentToken->Previous)
151 return false;
152 if (NonTemplateLess.count(CurrentToken->Previous) > 0)
153 return false;
155 const FormatToken &Previous = *CurrentToken->Previous; // The '<'.
156 if (Previous.Previous) {
157 if (Previous.Previous->Tok.isLiteral())
158 return false;
159 if (Previous.Previous->is(tok::r_brace))
160 return false;
161 if (Previous.Previous->is(tok::r_paren) && Contexts.size() > 1 &&
162 (!Previous.Previous->MatchingParen ||
163 Previous.Previous->MatchingParen->isNot(
164 TT_OverloadedOperatorLParen))) {
165 return false;
167 if (Previous.Previous->is(tok::kw_operator) &&
168 CurrentToken->is(tok::l_paren)) {
169 return false;
173 FormatToken *Left = CurrentToken->Previous;
174 Left->ParentBracket = Contexts.back().ContextKind;
175 ScopedContextCreator ContextCreator(*this, tok::less, 12);
177 // If this angle is in the context of an expression, we need to be more
178 // hesitant to detect it as opening template parameters.
179 bool InExprContext = Contexts.back().IsExpression;
181 Contexts.back().IsExpression = false;
182 // If there's a template keyword before the opening angle bracket, this is a
183 // template parameter, not an argument.
184 if (Left->Previous && Left->Previous->isNot(tok::kw_template))
185 Contexts.back().ContextType = Context::TemplateArgument;
187 if (Style.Language == FormatStyle::LK_Java &&
188 CurrentToken->is(tok::question)) {
189 next();
192 while (CurrentToken) {
193 if (CurrentToken->is(tok::greater)) {
194 // Try to do a better job at looking for ">>" within the condition of
195 // a statement. Conservatively insert spaces between consecutive ">"
196 // tokens to prevent splitting right bitshift operators and potentially
197 // altering program semantics. This check is overly conservative and
198 // will prevent spaces from being inserted in select nested template
199 // parameter cases, but should not alter program semantics.
200 if (CurrentToken->Next && CurrentToken->Next->is(tok::greater) &&
201 Left->ParentBracket != tok::less &&
202 CurrentToken->getStartOfNonWhitespace() ==
203 CurrentToken->Next->getStartOfNonWhitespace().getLocWithOffset(
204 -1)) {
205 return false;
207 Left->MatchingParen = CurrentToken;
208 CurrentToken->MatchingParen = Left;
209 // In TT_Proto, we must distignuish between:
210 // map<key, value>
211 // msg < item: data >
212 // msg: < item: data >
213 // In TT_TextProto, map<key, value> does not occur.
214 if (Style.Language == FormatStyle::LK_TextProto ||
215 (Style.Language == FormatStyle::LK_Proto && Left->Previous &&
216 Left->Previous->isOneOf(TT_SelectorName, TT_DictLiteral))) {
217 CurrentToken->setType(TT_DictLiteral);
218 } else {
219 CurrentToken->setType(TT_TemplateCloser);
220 CurrentToken->Tok.setLength(1);
222 if (CurrentToken->Next && CurrentToken->Next->Tok.isLiteral())
223 return false;
224 next();
225 return true;
227 if (CurrentToken->is(tok::question) &&
228 Style.Language == FormatStyle::LK_Java) {
229 next();
230 continue;
232 if (CurrentToken->isOneOf(tok::r_paren, tok::r_square, tok::r_brace) ||
233 (CurrentToken->isOneOf(tok::colon, tok::question) && InExprContext &&
234 !Style.isCSharp() && !Style.isProto())) {
235 return false;
237 // If a && or || is found and interpreted as a binary operator, this set
238 // of angles is likely part of something like "a < b && c > d". If the
239 // angles are inside an expression, the ||/&& might also be a binary
240 // operator that was misinterpreted because we are parsing template
241 // parameters.
242 // FIXME: This is getting out of hand, write a decent parser.
243 if (CurrentToken->Previous->isOneOf(tok::pipepipe, tok::ampamp) &&
244 CurrentToken->Previous->is(TT_BinaryOperator) &&
245 Contexts[Contexts.size() - 2].IsExpression &&
246 !Line.startsWith(tok::kw_template)) {
247 return false;
249 updateParameterCount(Left, CurrentToken);
250 if (Style.Language == FormatStyle::LK_Proto) {
251 if (FormatToken *Previous = CurrentToken->getPreviousNonComment()) {
252 if (CurrentToken->is(tok::colon) ||
253 (CurrentToken->isOneOf(tok::l_brace, tok::less) &&
254 Previous->isNot(tok::colon))) {
255 Previous->setType(TT_SelectorName);
259 if (Style.isTableGen()) {
260 if (CurrentToken->isOneOf(tok::comma, tok::equal)) {
261 // They appear as separators. Unless they are not in class definition.
262 next();
263 continue;
265 // In angle, there must be Value like tokens. Types are also able to be
266 // parsed in the same way with Values.
267 if (!parseTableGenValue())
268 return false;
269 continue;
271 if (!consumeToken())
272 return false;
274 return false;
277 bool parseUntouchableParens() {
278 while (CurrentToken) {
279 CurrentToken->Finalized = true;
280 switch (CurrentToken->Tok.getKind()) {
281 case tok::l_paren:
282 next();
283 if (!parseUntouchableParens())
284 return false;
285 continue;
286 case tok::r_paren:
287 next();
288 return true;
289 default:
290 // no-op
291 break;
293 next();
295 return false;
298 bool parseParens(bool LookForDecls = false) {
299 if (!CurrentToken)
300 return false;
301 assert(CurrentToken->Previous && "Unknown previous token");
302 FormatToken &OpeningParen = *CurrentToken->Previous;
303 assert(OpeningParen.is(tok::l_paren));
304 FormatToken *PrevNonComment = OpeningParen.getPreviousNonComment();
305 OpeningParen.ParentBracket = Contexts.back().ContextKind;
306 ScopedContextCreator ContextCreator(*this, tok::l_paren, 1);
308 // FIXME: This is a bit of a hack. Do better.
309 Contexts.back().ColonIsForRangeExpr =
310 Contexts.size() == 2 && Contexts[0].ColonIsForRangeExpr;
312 if (OpeningParen.Previous &&
313 OpeningParen.Previous->is(TT_UntouchableMacroFunc)) {
314 OpeningParen.Finalized = true;
315 return parseUntouchableParens();
318 bool StartsObjCMethodExpr = false;
319 if (!Style.isVerilog()) {
320 if (FormatToken *MaybeSel = OpeningParen.Previous) {
321 // @selector( starts a selector.
322 if (MaybeSel->isObjCAtKeyword(tok::objc_selector) &&
323 MaybeSel->Previous && MaybeSel->Previous->is(tok::at)) {
324 StartsObjCMethodExpr = true;
329 if (OpeningParen.is(TT_OverloadedOperatorLParen)) {
330 // Find the previous kw_operator token.
331 FormatToken *Prev = &OpeningParen;
332 while (Prev->isNot(tok::kw_operator)) {
333 Prev = Prev->Previous;
334 assert(Prev && "Expect a kw_operator prior to the OperatorLParen!");
337 // If faced with "a.operator*(argument)" or "a->operator*(argument)",
338 // i.e. the operator is called as a member function,
339 // then the argument must be an expression.
340 bool OperatorCalledAsMemberFunction =
341 Prev->Previous && Prev->Previous->isOneOf(tok::period, tok::arrow);
342 Contexts.back().IsExpression = OperatorCalledAsMemberFunction;
343 } else if (OpeningParen.is(TT_VerilogInstancePortLParen)) {
344 Contexts.back().IsExpression = true;
345 Contexts.back().ContextType = Context::VerilogInstancePortList;
346 } else if (Style.isJavaScript() &&
347 (Line.startsWith(Keywords.kw_type, tok::identifier) ||
348 Line.startsWith(tok::kw_export, Keywords.kw_type,
349 tok::identifier))) {
350 // type X = (...);
351 // export type X = (...);
352 Contexts.back().IsExpression = false;
353 } else if (OpeningParen.Previous &&
354 (OpeningParen.Previous->isOneOf(
355 tok::kw_static_assert, tok::kw_noexcept, tok::kw_explicit,
356 tok::kw_while, tok::l_paren, tok::comma,
357 TT_BinaryOperator) ||
358 OpeningParen.Previous->isIf())) {
359 // static_assert, if and while usually contain expressions.
360 Contexts.back().IsExpression = true;
361 } else if (Style.isJavaScript() && OpeningParen.Previous &&
362 (OpeningParen.Previous->is(Keywords.kw_function) ||
363 (OpeningParen.Previous->endsSequence(tok::identifier,
364 Keywords.kw_function)))) {
365 // function(...) or function f(...)
366 Contexts.back().IsExpression = false;
367 } else if (Style.isJavaScript() && OpeningParen.Previous &&
368 OpeningParen.Previous->is(TT_JsTypeColon)) {
369 // let x: (SomeType);
370 Contexts.back().IsExpression = false;
371 } else if (isLambdaParameterList(&OpeningParen)) {
372 // This is a parameter list of a lambda expression.
373 Contexts.back().IsExpression = false;
374 } else if (OpeningParen.is(TT_RequiresExpressionLParen)) {
375 Contexts.back().IsExpression = false;
376 } else if (OpeningParen.Previous &&
377 OpeningParen.Previous->is(tok::kw__Generic)) {
378 Contexts.back().ContextType = Context::C11GenericSelection;
379 Contexts.back().IsExpression = true;
380 } else if (Line.InPPDirective &&
381 (!OpeningParen.Previous ||
382 OpeningParen.Previous->isNot(tok::identifier))) {
383 Contexts.back().IsExpression = true;
384 } else if (Contexts[Contexts.size() - 2].CaretFound) {
385 // This is the parameter list of an ObjC block.
386 Contexts.back().IsExpression = false;
387 } else if (OpeningParen.Previous &&
388 OpeningParen.Previous->is(TT_ForEachMacro)) {
389 // The first argument to a foreach macro is a declaration.
390 Contexts.back().ContextType = Context::ForEachMacro;
391 Contexts.back().IsExpression = false;
392 } else if (OpeningParen.Previous && OpeningParen.Previous->MatchingParen &&
393 OpeningParen.Previous->MatchingParen->isOneOf(
394 TT_ObjCBlockLParen, TT_FunctionTypeLParen)) {
395 Contexts.back().IsExpression = false;
396 } else if (!Line.MustBeDeclaration && !Line.InPPDirective) {
397 bool IsForOrCatch =
398 OpeningParen.Previous &&
399 OpeningParen.Previous->isOneOf(tok::kw_for, tok::kw_catch);
400 Contexts.back().IsExpression = !IsForOrCatch;
403 if (Style.isTableGen()) {
404 if (FormatToken *Prev = OpeningParen.Previous) {
405 if (Prev->is(TT_TableGenCondOperator)) {
406 Contexts.back().IsTableGenCondOpe = true;
407 Contexts.back().IsExpression = true;
408 } else if (Contexts.size() > 1 &&
409 Contexts[Contexts.size() - 2].IsTableGenBangOpe) {
410 // Hack to handle bang operators. The parent context's flag
411 // was set by parseTableGenSimpleValue().
412 // We have to specify the context outside because the prev of "(" may
413 // be ">", not the bang operator in this case.
414 Contexts.back().IsTableGenBangOpe = true;
415 Contexts.back().IsExpression = true;
416 } else {
417 // Otherwise, this paren seems DAGArg.
418 if (!parseTableGenDAGArg())
419 return false;
420 return parseTableGenDAGArgAndList(&OpeningParen);
425 // Infer the role of the l_paren based on the previous token if we haven't
426 // detected one yet.
427 if (PrevNonComment && OpeningParen.is(TT_Unknown)) {
428 if (PrevNonComment->isAttribute()) {
429 OpeningParen.setType(TT_AttributeLParen);
430 } else if (PrevNonComment->isOneOf(TT_TypenameMacro, tok::kw_decltype,
431 tok::kw_typeof,
432 #define TRANSFORM_TYPE_TRAIT_DEF(_, Trait) tok::kw___##Trait,
433 #include "clang/Basic/TransformTypeTraits.def"
434 tok::kw__Atomic)) {
435 OpeningParen.setType(TT_TypeDeclarationParen);
436 // decltype() and typeof() usually contain expressions.
437 if (PrevNonComment->isOneOf(tok::kw_decltype, tok::kw_typeof))
438 Contexts.back().IsExpression = true;
442 if (StartsObjCMethodExpr) {
443 Contexts.back().ColonIsObjCMethodExpr = true;
444 OpeningParen.setType(TT_ObjCMethodExpr);
447 // MightBeFunctionType and ProbablyFunctionType are used for
448 // function pointer and reference types as well as Objective-C
449 // block types:
451 // void (*FunctionPointer)(void);
452 // void (&FunctionReference)(void);
453 // void (&&FunctionReference)(void);
454 // void (^ObjCBlock)(void);
455 bool MightBeFunctionType = !Contexts[Contexts.size() - 2].IsExpression;
456 bool ProbablyFunctionType =
457 CurrentToken->isPointerOrReference() || CurrentToken->is(tok::caret);
458 bool HasMultipleLines = false;
459 bool HasMultipleParametersOnALine = false;
460 bool MightBeObjCForRangeLoop =
461 OpeningParen.Previous && OpeningParen.Previous->is(tok::kw_for);
462 FormatToken *PossibleObjCForInToken = nullptr;
463 while (CurrentToken) {
464 // LookForDecls is set when "if (" has been seen. Check for
465 // 'identifier' '*' 'identifier' followed by not '=' -- this
466 // '*' has to be a binary operator but determineStarAmpUsage() will
467 // categorize it as an unary operator, so set the right type here.
468 if (LookForDecls && CurrentToken->Next) {
469 FormatToken *Prev = CurrentToken->getPreviousNonComment();
470 if (Prev) {
471 FormatToken *PrevPrev = Prev->getPreviousNonComment();
472 FormatToken *Next = CurrentToken->Next;
473 if (PrevPrev && PrevPrev->is(tok::identifier) &&
474 PrevPrev->isNot(TT_TypeName) && Prev->isPointerOrReference() &&
475 CurrentToken->is(tok::identifier) && Next->isNot(tok::equal)) {
476 Prev->setType(TT_BinaryOperator);
477 LookForDecls = false;
482 if (CurrentToken->Previous->is(TT_PointerOrReference) &&
483 CurrentToken->Previous->Previous->isOneOf(tok::l_paren,
484 tok::coloncolon)) {
485 ProbablyFunctionType = true;
487 if (CurrentToken->is(tok::comma))
488 MightBeFunctionType = false;
489 if (CurrentToken->Previous->is(TT_BinaryOperator))
490 Contexts.back().IsExpression = true;
491 if (CurrentToken->is(tok::r_paren)) {
492 if (OpeningParen.isNot(TT_CppCastLParen) && MightBeFunctionType &&
493 ProbablyFunctionType && CurrentToken->Next &&
494 (CurrentToken->Next->is(tok::l_paren) ||
495 (CurrentToken->Next->is(tok::l_square) &&
496 Line.MustBeDeclaration))) {
497 OpeningParen.setType(OpeningParen.Next->is(tok::caret)
498 ? TT_ObjCBlockLParen
499 : TT_FunctionTypeLParen);
501 OpeningParen.MatchingParen = CurrentToken;
502 CurrentToken->MatchingParen = &OpeningParen;
504 if (CurrentToken->Next && CurrentToken->Next->is(tok::l_brace) &&
505 OpeningParen.Previous && OpeningParen.Previous->is(tok::l_paren)) {
506 // Detect the case where macros are used to generate lambdas or
507 // function bodies, e.g.:
508 // auto my_lambda = MACRO((Type *type, int i) { .. body .. });
509 for (FormatToken *Tok = &OpeningParen; Tok != CurrentToken;
510 Tok = Tok->Next) {
511 if (Tok->is(TT_BinaryOperator) && Tok->isPointerOrReference())
512 Tok->setType(TT_PointerOrReference);
516 if (StartsObjCMethodExpr) {
517 CurrentToken->setType(TT_ObjCMethodExpr);
518 if (Contexts.back().FirstObjCSelectorName) {
519 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
520 Contexts.back().LongestObjCSelectorName;
524 if (OpeningParen.is(TT_AttributeLParen))
525 CurrentToken->setType(TT_AttributeRParen);
526 if (OpeningParen.is(TT_TypeDeclarationParen))
527 CurrentToken->setType(TT_TypeDeclarationParen);
528 if (OpeningParen.Previous &&
529 OpeningParen.Previous->is(TT_JavaAnnotation)) {
530 CurrentToken->setType(TT_JavaAnnotation);
532 if (OpeningParen.Previous &&
533 OpeningParen.Previous->is(TT_LeadingJavaAnnotation)) {
534 CurrentToken->setType(TT_LeadingJavaAnnotation);
536 if (OpeningParen.Previous &&
537 OpeningParen.Previous->is(TT_AttributeSquare)) {
538 CurrentToken->setType(TT_AttributeSquare);
541 if (!HasMultipleLines)
542 OpeningParen.setPackingKind(PPK_Inconclusive);
543 else if (HasMultipleParametersOnALine)
544 OpeningParen.setPackingKind(PPK_BinPacked);
545 else
546 OpeningParen.setPackingKind(PPK_OnePerLine);
548 next();
549 return true;
551 if (CurrentToken->isOneOf(tok::r_square, tok::r_brace))
552 return false;
554 if (CurrentToken->is(tok::l_brace) && OpeningParen.is(TT_ObjCBlockLParen))
555 OpeningParen.setType(TT_Unknown);
556 if (CurrentToken->is(tok::comma) && CurrentToken->Next &&
557 !CurrentToken->Next->HasUnescapedNewline &&
558 !CurrentToken->Next->isTrailingComment()) {
559 HasMultipleParametersOnALine = true;
561 bool ProbablyFunctionTypeLParen =
562 (CurrentToken->is(tok::l_paren) && CurrentToken->Next &&
563 CurrentToken->Next->isOneOf(tok::star, tok::amp, tok::caret));
564 if ((CurrentToken->Previous->isOneOf(tok::kw_const, tok::kw_auto) ||
565 CurrentToken->Previous->isTypeName(IsCpp)) &&
566 !(CurrentToken->is(tok::l_brace) ||
567 (CurrentToken->is(tok::l_paren) && !ProbablyFunctionTypeLParen))) {
568 Contexts.back().IsExpression = false;
570 if (CurrentToken->isOneOf(tok::semi, tok::colon)) {
571 MightBeObjCForRangeLoop = false;
572 if (PossibleObjCForInToken) {
573 PossibleObjCForInToken->setType(TT_Unknown);
574 PossibleObjCForInToken = nullptr;
577 if (MightBeObjCForRangeLoop && CurrentToken->is(Keywords.kw_in)) {
578 PossibleObjCForInToken = CurrentToken;
579 PossibleObjCForInToken->setType(TT_ObjCForIn);
581 // When we discover a 'new', we set CanBeExpression to 'false' in order to
582 // parse the type correctly. Reset that after a comma.
583 if (CurrentToken->is(tok::comma))
584 Contexts.back().CanBeExpression = true;
586 if (Style.isTableGen()) {
587 if (CurrentToken->is(tok::comma)) {
588 if (Contexts.back().IsTableGenCondOpe)
589 CurrentToken->setType(TT_TableGenCondOperatorComma);
590 next();
591 } else if (CurrentToken->is(tok::colon)) {
592 if (Contexts.back().IsTableGenCondOpe)
593 CurrentToken->setType(TT_TableGenCondOperatorColon);
594 next();
596 // In TableGen there must be Values in parens.
597 if (!parseTableGenValue())
598 return false;
599 continue;
602 FormatToken *Tok = CurrentToken;
603 if (!consumeToken())
604 return false;
605 updateParameterCount(&OpeningParen, Tok);
606 if (CurrentToken && CurrentToken->HasUnescapedNewline)
607 HasMultipleLines = true;
609 return false;
612 bool isCSharpAttributeSpecifier(const FormatToken &Tok) {
613 if (!Style.isCSharp())
614 return false;
616 // `identifier[i]` is not an attribute.
617 if (Tok.Previous && Tok.Previous->is(tok::identifier))
618 return false;
620 // Chains of [] in `identifier[i][j][k]` are not attributes.
621 if (Tok.Previous && Tok.Previous->is(tok::r_square)) {
622 auto *MatchingParen = Tok.Previous->MatchingParen;
623 if (!MatchingParen || MatchingParen->is(TT_ArraySubscriptLSquare))
624 return false;
627 const FormatToken *AttrTok = Tok.Next;
628 if (!AttrTok)
629 return false;
631 // Just an empty declaration e.g. string [].
632 if (AttrTok->is(tok::r_square))
633 return false;
635 // Move along the tokens inbetween the '[' and ']' e.g. [STAThread].
636 while (AttrTok && AttrTok->isNot(tok::r_square))
637 AttrTok = AttrTok->Next;
639 if (!AttrTok)
640 return false;
642 // Allow an attribute to be the only content of a file.
643 AttrTok = AttrTok->Next;
644 if (!AttrTok)
645 return true;
647 // Limit this to being an access modifier that follows.
648 if (AttrTok->isOneOf(tok::kw_public, tok::kw_private, tok::kw_protected,
649 tok::comment, tok::kw_class, tok::kw_static,
650 tok::l_square, Keywords.kw_internal)) {
651 return true;
654 // incase its a [XXX] retval func(....
655 if (AttrTok->Next &&
656 AttrTok->Next->startsSequence(tok::identifier, tok::l_paren)) {
657 return true;
660 return false;
663 bool parseSquare() {
664 if (!CurrentToken)
665 return false;
667 // A '[' could be an index subscript (after an identifier or after
668 // ')' or ']'), it could be the start of an Objective-C method
669 // expression, it could the start of an Objective-C array literal,
670 // or it could be a C++ attribute specifier [[foo::bar]].
671 FormatToken *Left = CurrentToken->Previous;
672 Left->ParentBracket = Contexts.back().ContextKind;
673 FormatToken *Parent = Left->getPreviousNonComment();
675 // Cases where '>' is followed by '['.
676 // In C++, this can happen either in array of templates (foo<int>[10])
677 // or when array is a nested template type (unique_ptr<type1<type2>[]>).
678 bool CppArrayTemplates =
679 IsCpp && Parent && Parent->is(TT_TemplateCloser) &&
680 (Contexts.back().CanBeExpression || Contexts.back().IsExpression ||
681 Contexts.back().ContextType == Context::TemplateArgument);
683 const bool IsInnerSquare = Contexts.back().InCpp11AttributeSpecifier;
684 const bool IsCpp11AttributeSpecifier =
685 isCppAttribute(IsCpp, *Left) || IsInnerSquare;
687 // Treat C# Attributes [STAThread] much like C++ attributes [[...]].
688 bool IsCSharpAttributeSpecifier =
689 isCSharpAttributeSpecifier(*Left) ||
690 Contexts.back().InCSharpAttributeSpecifier;
692 bool InsideInlineASM = Line.startsWith(tok::kw_asm);
693 bool IsCppStructuredBinding = Left->isCppStructuredBinding(IsCpp);
694 bool StartsObjCMethodExpr =
695 !IsCppStructuredBinding && !InsideInlineASM && !CppArrayTemplates &&
696 IsCpp && !IsCpp11AttributeSpecifier && !IsCSharpAttributeSpecifier &&
697 Contexts.back().CanBeExpression && Left->isNot(TT_LambdaLSquare) &&
698 !CurrentToken->isOneOf(tok::l_brace, tok::r_square) &&
699 (!Parent ||
700 Parent->isOneOf(tok::colon, tok::l_square, tok::l_paren,
701 tok::kw_return, tok::kw_throw) ||
702 Parent->isUnaryOperator() ||
703 // FIXME(bug 36976): ObjC return types shouldn't use TT_CastRParen.
704 Parent->isOneOf(TT_ObjCForIn, TT_CastRParen) ||
705 (getBinOpPrecedence(Parent->Tok.getKind(), true, true) >
706 prec::Unknown));
707 bool ColonFound = false;
709 unsigned BindingIncrease = 1;
710 if (IsCppStructuredBinding) {
711 Left->setType(TT_StructuredBindingLSquare);
712 } else if (Left->is(TT_Unknown)) {
713 if (StartsObjCMethodExpr) {
714 Left->setType(TT_ObjCMethodExpr);
715 } else if (InsideInlineASM) {
716 Left->setType(TT_InlineASMSymbolicNameLSquare);
717 } else if (IsCpp11AttributeSpecifier) {
718 Left->setType(TT_AttributeSquare);
719 if (!IsInnerSquare && Left->Previous)
720 Left->Previous->EndsCppAttributeGroup = false;
721 } else if (Style.isJavaScript() && Parent &&
722 Contexts.back().ContextKind == tok::l_brace &&
723 Parent->isOneOf(tok::l_brace, tok::comma)) {
724 Left->setType(TT_JsComputedPropertyName);
725 } else if (IsCpp && Contexts.back().ContextKind == tok::l_brace &&
726 Parent && Parent->isOneOf(tok::l_brace, tok::comma)) {
727 Left->setType(TT_DesignatedInitializerLSquare);
728 } else if (IsCSharpAttributeSpecifier) {
729 Left->setType(TT_AttributeSquare);
730 } else if (CurrentToken->is(tok::r_square) && Parent &&
731 Parent->is(TT_TemplateCloser)) {
732 Left->setType(TT_ArraySubscriptLSquare);
733 } else if (Style.isProto()) {
734 // Square braces in LK_Proto can either be message field attributes:
736 // optional Aaa aaa = 1 [
737 // (aaa) = aaa
738 // ];
740 // extensions 123 [
741 // (aaa) = aaa
742 // ];
744 // or text proto extensions (in options):
746 // option (Aaa.options) = {
747 // [type.type/type] {
748 // key: value
749 // }
750 // }
752 // or repeated fields (in options):
754 // option (Aaa.options) = {
755 // keys: [ 1, 2, 3 ]
756 // }
758 // In the first and the third case we want to spread the contents inside
759 // the square braces; in the second we want to keep them inline.
760 Left->setType(TT_ArrayInitializerLSquare);
761 if (!Left->endsSequence(tok::l_square, tok::numeric_constant,
762 tok::equal) &&
763 !Left->endsSequence(tok::l_square, tok::numeric_constant,
764 tok::identifier) &&
765 !Left->endsSequence(tok::l_square, tok::colon, TT_SelectorName)) {
766 Left->setType(TT_ProtoExtensionLSquare);
767 BindingIncrease = 10;
769 } else if (!CppArrayTemplates && Parent &&
770 Parent->isOneOf(TT_BinaryOperator, TT_TemplateCloser, tok::at,
771 tok::comma, tok::l_paren, tok::l_square,
772 tok::question, tok::colon, tok::kw_return,
773 // Should only be relevant to JavaScript:
774 tok::kw_default)) {
775 Left->setType(TT_ArrayInitializerLSquare);
776 } else {
777 BindingIncrease = 10;
778 Left->setType(TT_ArraySubscriptLSquare);
782 ScopedContextCreator ContextCreator(*this, tok::l_square, BindingIncrease);
783 Contexts.back().IsExpression = true;
784 if (Style.isJavaScript() && Parent && Parent->is(TT_JsTypeColon))
785 Contexts.back().IsExpression = false;
787 Contexts.back().ColonIsObjCMethodExpr = StartsObjCMethodExpr;
788 Contexts.back().InCpp11AttributeSpecifier = IsCpp11AttributeSpecifier;
789 Contexts.back().InCSharpAttributeSpecifier = IsCSharpAttributeSpecifier;
791 while (CurrentToken) {
792 if (CurrentToken->is(tok::r_square)) {
793 if (IsCpp11AttributeSpecifier) {
794 CurrentToken->setType(TT_AttributeSquare);
795 if (!IsInnerSquare)
796 CurrentToken->EndsCppAttributeGroup = true;
798 if (IsCSharpAttributeSpecifier) {
799 CurrentToken->setType(TT_AttributeSquare);
800 } else if (((CurrentToken->Next &&
801 CurrentToken->Next->is(tok::l_paren)) ||
802 (CurrentToken->Previous &&
803 CurrentToken->Previous->Previous == Left)) &&
804 Left->is(TT_ObjCMethodExpr)) {
805 // An ObjC method call is rarely followed by an open parenthesis. It
806 // also can't be composed of just one token, unless it's a macro that
807 // will be expanded to more tokens.
808 // FIXME: Do we incorrectly label ":" with this?
809 StartsObjCMethodExpr = false;
810 Left->setType(TT_Unknown);
812 if (StartsObjCMethodExpr && CurrentToken->Previous != Left) {
813 CurrentToken->setType(TT_ObjCMethodExpr);
814 // If we haven't seen a colon yet, make sure the last identifier
815 // before the r_square is tagged as a selector name component.
816 if (!ColonFound && CurrentToken->Previous &&
817 CurrentToken->Previous->is(TT_Unknown) &&
818 canBeObjCSelectorComponent(*CurrentToken->Previous)) {
819 CurrentToken->Previous->setType(TT_SelectorName);
821 // determineStarAmpUsage() thinks that '*' '[' is allocating an
822 // array of pointers, but if '[' starts a selector then '*' is a
823 // binary operator.
824 if (Parent && Parent->is(TT_PointerOrReference))
825 Parent->overwriteFixedType(TT_BinaryOperator);
827 // An arrow after an ObjC method expression is not a lambda arrow.
828 if (CurrentToken->is(TT_ObjCMethodExpr) && CurrentToken->Next &&
829 CurrentToken->Next->is(TT_TrailingReturnArrow)) {
830 CurrentToken->Next->overwriteFixedType(TT_Unknown);
832 Left->MatchingParen = CurrentToken;
833 CurrentToken->MatchingParen = Left;
834 // FirstObjCSelectorName is set when a colon is found. This does
835 // not work, however, when the method has no parameters.
836 // Here, we set FirstObjCSelectorName when the end of the method call is
837 // reached, in case it was not set already.
838 if (!Contexts.back().FirstObjCSelectorName) {
839 FormatToken *Previous = CurrentToken->getPreviousNonComment();
840 if (Previous && Previous->is(TT_SelectorName)) {
841 Previous->ObjCSelectorNameParts = 1;
842 Contexts.back().FirstObjCSelectorName = Previous;
844 } else {
845 Left->ParameterCount =
846 Contexts.back().FirstObjCSelectorName->ObjCSelectorNameParts;
848 if (Contexts.back().FirstObjCSelectorName) {
849 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
850 Contexts.back().LongestObjCSelectorName;
851 if (Left->BlockParameterCount > 1)
852 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 0;
854 if (Style.isTableGen() && Left->is(TT_TableGenListOpener))
855 CurrentToken->setType(TT_TableGenListCloser);
856 next();
857 return true;
859 if (CurrentToken->isOneOf(tok::r_paren, tok::r_brace))
860 return false;
861 if (CurrentToken->is(tok::colon)) {
862 if (IsCpp11AttributeSpecifier &&
863 CurrentToken->endsSequence(tok::colon, tok::identifier,
864 tok::kw_using)) {
865 // Remember that this is a [[using ns: foo]] C++ attribute, so we
866 // don't add a space before the colon (unlike other colons).
867 CurrentToken->setType(TT_AttributeColon);
868 } else if (!Style.isVerilog() && !Line.InPragmaDirective &&
869 Left->isOneOf(TT_ArraySubscriptLSquare,
870 TT_DesignatedInitializerLSquare)) {
871 Left->setType(TT_ObjCMethodExpr);
872 StartsObjCMethodExpr = true;
873 Contexts.back().ColonIsObjCMethodExpr = true;
874 if (Parent && Parent->is(tok::r_paren)) {
875 // FIXME(bug 36976): ObjC return types shouldn't use TT_CastRParen.
876 Parent->setType(TT_CastRParen);
879 ColonFound = true;
881 if (CurrentToken->is(tok::comma) && Left->is(TT_ObjCMethodExpr) &&
882 !ColonFound) {
883 Left->setType(TT_ArrayInitializerLSquare);
885 FormatToken *Tok = CurrentToken;
886 if (Style.isTableGen()) {
887 if (CurrentToken->isOneOf(tok::comma, tok::minus, tok::ellipsis)) {
888 // '-' and '...' appears as a separator in slice.
889 next();
890 } else {
891 // In TableGen there must be a list of Values in square brackets.
892 // It must be ValueList or SliceElements.
893 if (!parseTableGenValue())
894 return false;
896 updateParameterCount(Left, Tok);
897 continue;
899 if (!consumeToken())
900 return false;
901 updateParameterCount(Left, Tok);
903 return false;
906 void skipToNextNonComment() {
907 next();
908 while (CurrentToken && CurrentToken->is(tok::comment))
909 next();
912 // Simplified parser for TableGen Value. Returns true on success.
913 // It consists of SimpleValues, SimpleValues with Suffixes, and Value followed
914 // by '#', paste operator.
915 // There also exists the case the Value is parsed as NameValue.
916 // In this case, the Value ends if '{' is found.
917 bool parseTableGenValue(bool ParseNameMode = false) {
918 if (!CurrentToken)
919 return false;
920 while (CurrentToken->is(tok::comment))
921 next();
922 if (!parseTableGenSimpleValue())
923 return false;
924 if (!CurrentToken)
925 return true;
926 // Value "#" [Value]
927 if (CurrentToken->is(tok::hash)) {
928 if (CurrentToken->Next &&
929 CurrentToken->Next->isOneOf(tok::colon, tok::semi, tok::l_brace)) {
930 // Trailing paste operator.
931 // These are only the allowed cases in TGParser::ParseValue().
932 CurrentToken->setType(TT_TableGenTrailingPasteOperator);
933 next();
934 return true;
936 FormatToken *HashTok = CurrentToken;
937 skipToNextNonComment();
938 HashTok->setType(TT_Unknown);
939 if (!parseTableGenValue(ParseNameMode))
940 return false;
942 // In name mode, '{' is regarded as the end of the value.
943 // See TGParser::ParseValue in TGParser.cpp
944 if (ParseNameMode && CurrentToken->is(tok::l_brace))
945 return true;
946 // These tokens indicates this is a value with suffixes.
947 if (CurrentToken->isOneOf(tok::l_brace, tok::l_square, tok::period)) {
948 CurrentToken->setType(TT_TableGenValueSuffix);
949 FormatToken *Suffix = CurrentToken;
950 skipToNextNonComment();
951 if (Suffix->is(tok::l_square))
952 return parseSquare();
953 if (Suffix->is(tok::l_brace)) {
954 Scopes.push_back(getScopeType(*Suffix));
955 return parseBrace();
958 return true;
961 // TokVarName ::= "$" ualpha (ualpha | "0"..."9")*
962 // Appears as a part of DagArg.
963 // This does not change the current token on fail.
964 bool tryToParseTableGenTokVar() {
965 if (!CurrentToken)
966 return false;
967 if (CurrentToken->is(tok::identifier) &&
968 CurrentToken->TokenText.front() == '$') {
969 skipToNextNonComment();
970 return true;
972 return false;
975 // DagArg ::= Value [":" TokVarName] | TokVarName
976 // Appears as a part of SimpleValue6.
977 bool parseTableGenDAGArg(bool AlignColon = false) {
978 if (tryToParseTableGenTokVar())
979 return true;
980 if (parseTableGenValue()) {
981 if (CurrentToken && CurrentToken->is(tok::colon)) {
982 if (AlignColon)
983 CurrentToken->setType(TT_TableGenDAGArgListColonToAlign);
984 else
985 CurrentToken->setType(TT_TableGenDAGArgListColon);
986 skipToNextNonComment();
987 return tryToParseTableGenTokVar();
989 return true;
991 return false;
994 // Judge if the token is a operator ID to insert line break in DAGArg.
995 // That is, TableGenBreakingDAGArgOperators is empty (by the definition of the
996 // option) or the token is in the list.
997 bool isTableGenDAGArgBreakingOperator(const FormatToken &Tok) {
998 auto &Opes = Style.TableGenBreakingDAGArgOperators;
999 // If the list is empty, all operators are breaking operators.
1000 if (Opes.empty())
1001 return true;
1002 // Otherwise, the operator is limited to normal identifiers.
1003 if (Tok.isNot(tok::identifier) ||
1004 Tok.isOneOf(TT_TableGenBangOperator, TT_TableGenCondOperator)) {
1005 return false;
1007 // The case next is colon, it is not a operator of identifier.
1008 if (!Tok.Next || Tok.Next->is(tok::colon))
1009 return false;
1010 return std::find(Opes.begin(), Opes.end(), Tok.TokenText.str()) !=
1011 Opes.end();
1014 // SimpleValue6 ::= "(" DagArg [DagArgList] ")"
1015 // This parses SimpleValue 6's inside part of "(" ")"
1016 bool parseTableGenDAGArgAndList(FormatToken *Opener) {
1017 FormatToken *FirstTok = CurrentToken;
1018 if (!parseTableGenDAGArg())
1019 return false;
1020 bool BreakInside = false;
1021 if (Style.TableGenBreakInsideDAGArg != FormatStyle::DAS_DontBreak) {
1022 // Specialized detection for DAGArgOperator, that determines the way of
1023 // line break for this DAGArg elements.
1024 if (isTableGenDAGArgBreakingOperator(*FirstTok)) {
1025 // Special case for identifier DAGArg operator.
1026 BreakInside = true;
1027 Opener->setType(TT_TableGenDAGArgOpenerToBreak);
1028 if (FirstTok->isOneOf(TT_TableGenBangOperator,
1029 TT_TableGenCondOperator)) {
1030 // Special case for bang/cond operators. Set the whole operator as
1031 // the DAGArg operator. Always break after it.
1032 CurrentToken->Previous->setType(TT_TableGenDAGArgOperatorToBreak);
1033 } else if (FirstTok->is(tok::identifier)) {
1034 if (Style.TableGenBreakInsideDAGArg == FormatStyle::DAS_BreakAll)
1035 FirstTok->setType(TT_TableGenDAGArgOperatorToBreak);
1036 else
1037 FirstTok->setType(TT_TableGenDAGArgOperatorID);
1041 // Parse the [DagArgList] part
1042 bool FirstDAGArgListElm = true;
1043 while (CurrentToken) {
1044 if (!FirstDAGArgListElm && CurrentToken->is(tok::comma)) {
1045 CurrentToken->setType(BreakInside ? TT_TableGenDAGArgListCommaToBreak
1046 : TT_TableGenDAGArgListComma);
1047 skipToNextNonComment();
1049 if (CurrentToken && CurrentToken->is(tok::r_paren)) {
1050 CurrentToken->setType(TT_TableGenDAGArgCloser);
1051 Opener->MatchingParen = CurrentToken;
1052 CurrentToken->MatchingParen = Opener;
1053 skipToNextNonComment();
1054 return true;
1056 if (!parseTableGenDAGArg(
1057 BreakInside &&
1058 Style.AlignConsecutiveTableGenBreakingDAGArgColons.Enabled)) {
1059 return false;
1061 FirstDAGArgListElm = false;
1063 return false;
1066 bool parseTableGenSimpleValue() {
1067 assert(Style.isTableGen());
1068 if (!CurrentToken)
1069 return false;
1070 FormatToken *Tok = CurrentToken;
1071 skipToNextNonComment();
1072 // SimpleValue 1, 2, 3: Literals
1073 if (Tok->isOneOf(tok::numeric_constant, tok::string_literal,
1074 TT_TableGenMultiLineString, tok::kw_true, tok::kw_false,
1075 tok::question, tok::kw_int)) {
1076 return true;
1078 // SimpleValue 4: ValueList, Type
1079 if (Tok->is(tok::l_brace)) {
1080 Scopes.push_back(getScopeType(*Tok));
1081 return parseBrace();
1083 // SimpleValue 5: List initializer
1084 if (Tok->is(tok::l_square)) {
1085 Tok->setType(TT_TableGenListOpener);
1086 if (!parseSquare())
1087 return false;
1088 if (Tok->is(tok::less)) {
1089 CurrentToken->setType(TT_TemplateOpener);
1090 return parseAngle();
1092 return true;
1094 // SimpleValue 6: DAGArg [DAGArgList]
1095 // SimpleValue6 ::= "(" DagArg [DagArgList] ")"
1096 if (Tok->is(tok::l_paren)) {
1097 Tok->setType(TT_TableGenDAGArgOpener);
1098 return parseTableGenDAGArgAndList(Tok);
1100 // SimpleValue 9: Bang operator
1101 if (Tok->is(TT_TableGenBangOperator)) {
1102 if (CurrentToken && CurrentToken->is(tok::less)) {
1103 CurrentToken->setType(TT_TemplateOpener);
1104 skipToNextNonComment();
1105 if (!parseAngle())
1106 return false;
1108 if (!CurrentToken || CurrentToken->isNot(tok::l_paren))
1109 return false;
1110 skipToNextNonComment();
1111 // FIXME: Hack using inheritance to child context
1112 Contexts.back().IsTableGenBangOpe = true;
1113 bool Result = parseParens();
1114 Contexts.back().IsTableGenBangOpe = false;
1115 return Result;
1117 // SimpleValue 9: Cond operator
1118 if (Tok->is(TT_TableGenCondOperator)) {
1119 Tok = CurrentToken;
1120 skipToNextNonComment();
1121 if (!Tok || Tok->isNot(tok::l_paren))
1122 return false;
1123 bool Result = parseParens();
1124 return Result;
1126 // We have to check identifier at the last because the kind of bang/cond
1127 // operators are also identifier.
1128 // SimpleValue 7: Identifiers
1129 if (Tok->is(tok::identifier)) {
1130 // SimpleValue 8: Anonymous record
1131 if (CurrentToken && CurrentToken->is(tok::less)) {
1132 CurrentToken->setType(TT_TemplateOpener);
1133 skipToNextNonComment();
1134 return parseAngle();
1136 return true;
1139 return false;
1142 bool couldBeInStructArrayInitializer() const {
1143 if (Contexts.size() < 2)
1144 return false;
1145 // We want to back up no more then 2 context levels i.e.
1146 // . { { <-
1147 const auto End = std::next(Contexts.rbegin(), 2);
1148 auto Last = Contexts.rbegin();
1149 unsigned Depth = 0;
1150 for (; Last != End; ++Last)
1151 if (Last->ContextKind == tok::l_brace)
1152 ++Depth;
1153 return Depth == 2 && Last->ContextKind != tok::l_brace;
1156 bool parseBrace() {
1157 if (!CurrentToken)
1158 return true;
1160 assert(CurrentToken->Previous);
1161 FormatToken &OpeningBrace = *CurrentToken->Previous;
1162 assert(OpeningBrace.is(tok::l_brace));
1163 OpeningBrace.ParentBracket = Contexts.back().ContextKind;
1165 if (Contexts.back().CaretFound)
1166 OpeningBrace.overwriteFixedType(TT_ObjCBlockLBrace);
1167 Contexts.back().CaretFound = false;
1169 ScopedContextCreator ContextCreator(*this, tok::l_brace, 1);
1170 Contexts.back().ColonIsDictLiteral = true;
1171 if (OpeningBrace.is(BK_BracedInit))
1172 Contexts.back().IsExpression = true;
1173 if (Style.isJavaScript() && OpeningBrace.Previous &&
1174 OpeningBrace.Previous->is(TT_JsTypeColon)) {
1175 Contexts.back().IsExpression = false;
1177 if (Style.isVerilog() &&
1178 (!OpeningBrace.getPreviousNonComment() ||
1179 OpeningBrace.getPreviousNonComment()->isNot(Keywords.kw_apostrophe))) {
1180 Contexts.back().VerilogMayBeConcatenation = true;
1182 if (Style.isTableGen())
1183 Contexts.back().ColonIsDictLiteral = false;
1185 unsigned CommaCount = 0;
1186 while (CurrentToken) {
1187 if (CurrentToken->is(tok::r_brace)) {
1188 assert(!Scopes.empty());
1189 assert(Scopes.back() == getScopeType(OpeningBrace));
1190 Scopes.pop_back();
1191 assert(OpeningBrace.Optional == CurrentToken->Optional);
1192 OpeningBrace.MatchingParen = CurrentToken;
1193 CurrentToken->MatchingParen = &OpeningBrace;
1194 if (Style.AlignArrayOfStructures != FormatStyle::AIAS_None) {
1195 if (OpeningBrace.ParentBracket == tok::l_brace &&
1196 couldBeInStructArrayInitializer() && CommaCount > 0) {
1197 Contexts.back().ContextType = Context::StructArrayInitializer;
1200 next();
1201 return true;
1203 if (CurrentToken->isOneOf(tok::r_paren, tok::r_square))
1204 return false;
1205 updateParameterCount(&OpeningBrace, CurrentToken);
1206 if (CurrentToken->isOneOf(tok::colon, tok::l_brace, tok::less)) {
1207 FormatToken *Previous = CurrentToken->getPreviousNonComment();
1208 if (Previous->is(TT_JsTypeOptionalQuestion))
1209 Previous = Previous->getPreviousNonComment();
1210 if ((CurrentToken->is(tok::colon) && !Style.isTableGen() &&
1211 (!Contexts.back().ColonIsDictLiteral || !IsCpp)) ||
1212 Style.isProto()) {
1213 OpeningBrace.setType(TT_DictLiteral);
1214 if (Previous->Tok.getIdentifierInfo() ||
1215 Previous->is(tok::string_literal)) {
1216 Previous->setType(TT_SelectorName);
1219 if (CurrentToken->is(tok::colon) && OpeningBrace.is(TT_Unknown) &&
1220 !Style.isTableGen()) {
1221 OpeningBrace.setType(TT_DictLiteral);
1222 } else if (Style.isJavaScript()) {
1223 OpeningBrace.overwriteFixedType(TT_DictLiteral);
1226 if (CurrentToken->is(tok::comma)) {
1227 if (Style.isJavaScript())
1228 OpeningBrace.overwriteFixedType(TT_DictLiteral);
1229 ++CommaCount;
1231 if (!consumeToken())
1232 return false;
1234 return true;
1237 void updateParameterCount(FormatToken *Left, FormatToken *Current) {
1238 // For ObjC methods, the number of parameters is calculated differently as
1239 // method declarations have a different structure (the parameters are not
1240 // inside a bracket scope).
1241 if (Current->is(tok::l_brace) && Current->is(BK_Block))
1242 ++Left->BlockParameterCount;
1243 if (Current->is(tok::comma)) {
1244 ++Left->ParameterCount;
1245 if (!Left->Role)
1246 Left->Role.reset(new CommaSeparatedList(Style));
1247 Left->Role->CommaFound(Current);
1248 } else if (Left->ParameterCount == 0 && Current->isNot(tok::comment)) {
1249 Left->ParameterCount = 1;
1253 bool parseConditional() {
1254 while (CurrentToken) {
1255 if (CurrentToken->is(tok::colon)) {
1256 CurrentToken->setType(TT_ConditionalExpr);
1257 next();
1258 return true;
1260 if (!consumeToken())
1261 return false;
1263 return false;
1266 bool parseTemplateDeclaration() {
1267 if (CurrentToken && CurrentToken->is(tok::less)) {
1268 CurrentToken->setType(TT_TemplateOpener);
1269 next();
1270 if (!parseAngle())
1271 return false;
1272 if (CurrentToken)
1273 CurrentToken->Previous->ClosesTemplateDeclaration = true;
1274 return true;
1276 return false;
1279 bool consumeToken() {
1280 if (IsCpp) {
1281 const auto *Prev = CurrentToken->getPreviousNonComment();
1282 if (Prev && Prev->is(tok::r_square) && Prev->is(TT_AttributeSquare) &&
1283 CurrentToken->isOneOf(tok::kw_if, tok::kw_switch, tok::kw_case,
1284 tok::kw_default, tok::kw_for, tok::kw_while) &&
1285 mustBreakAfterAttributes(*CurrentToken, Style)) {
1286 CurrentToken->MustBreakBefore = true;
1289 FormatToken *Tok = CurrentToken;
1290 next();
1291 // In Verilog primitives' state tables, `:`, `?`, and `-` aren't normal
1292 // operators.
1293 if (Tok->is(TT_VerilogTableItem))
1294 return true;
1295 // Multi-line string itself is a single annotated token.
1296 if (Tok->is(TT_TableGenMultiLineString))
1297 return true;
1298 switch (Tok->Tok.getKind()) {
1299 case tok::plus:
1300 case tok::minus:
1301 if (!Tok->Previous && Line.MustBeDeclaration)
1302 Tok->setType(TT_ObjCMethodSpecifier);
1303 break;
1304 case tok::colon:
1305 if (!Tok->Previous)
1306 return false;
1307 // Goto labels and case labels are already identified in
1308 // UnwrappedLineParser.
1309 if (Tok->isTypeFinalized())
1310 break;
1311 // Colons from ?: are handled in parseConditional().
1312 if (Style.isJavaScript()) {
1313 if (Contexts.back().ColonIsForRangeExpr || // colon in for loop
1314 (Contexts.size() == 1 && // switch/case labels
1315 !Line.First->isOneOf(tok::kw_enum, tok::kw_case)) ||
1316 Contexts.back().ContextKind == tok::l_paren || // function params
1317 Contexts.back().ContextKind == tok::l_square || // array type
1318 (!Contexts.back().IsExpression &&
1319 Contexts.back().ContextKind == tok::l_brace) || // object type
1320 (Contexts.size() == 1 &&
1321 Line.MustBeDeclaration)) { // method/property declaration
1322 Contexts.back().IsExpression = false;
1323 Tok->setType(TT_JsTypeColon);
1324 break;
1326 } else if (Style.isCSharp()) {
1327 if (Contexts.back().InCSharpAttributeSpecifier) {
1328 Tok->setType(TT_AttributeColon);
1329 break;
1331 if (Contexts.back().ContextKind == tok::l_paren) {
1332 Tok->setType(TT_CSharpNamedArgumentColon);
1333 break;
1335 } else if (Style.isVerilog() && Tok->isNot(TT_BinaryOperator)) {
1336 // The distribution weight operators are labeled
1337 // TT_BinaryOperator by the lexer.
1338 if (Keywords.isVerilogEnd(*Tok->Previous) ||
1339 Keywords.isVerilogBegin(*Tok->Previous)) {
1340 Tok->setType(TT_VerilogBlockLabelColon);
1341 } else if (Contexts.back().ContextKind == tok::l_square) {
1342 Tok->setType(TT_BitFieldColon);
1343 } else if (Contexts.back().ColonIsDictLiteral) {
1344 Tok->setType(TT_DictLiteral);
1345 } else if (Contexts.size() == 1) {
1346 // In Verilog a case label doesn't have the case keyword. We
1347 // assume a colon following an expression is a case label.
1348 // Colons from ?: are annotated in parseConditional().
1349 Tok->setType(TT_CaseLabelColon);
1350 if (Line.Level > 1 || (!Line.InPPDirective && Line.Level > 0))
1351 --Line.Level;
1353 break;
1355 if (Line.First->isOneOf(Keywords.kw_module, Keywords.kw_import) ||
1356 Line.First->startsSequence(tok::kw_export, Keywords.kw_module) ||
1357 Line.First->startsSequence(tok::kw_export, Keywords.kw_import)) {
1358 Tok->setType(TT_ModulePartitionColon);
1359 } else if (Contexts.back().ColonIsDictLiteral || Style.isProto()) {
1360 Tok->setType(TT_DictLiteral);
1361 if (Style.Language == FormatStyle::LK_TextProto) {
1362 if (FormatToken *Previous = Tok->getPreviousNonComment())
1363 Previous->setType(TT_SelectorName);
1365 } else if (Contexts.back().ColonIsObjCMethodExpr ||
1366 Line.startsWith(TT_ObjCMethodSpecifier)) {
1367 Tok->setType(TT_ObjCMethodExpr);
1368 const FormatToken *BeforePrevious = Tok->Previous->Previous;
1369 // Ensure we tag all identifiers in method declarations as
1370 // TT_SelectorName.
1371 bool UnknownIdentifierInMethodDeclaration =
1372 Line.startsWith(TT_ObjCMethodSpecifier) &&
1373 Tok->Previous->is(tok::identifier) && Tok->Previous->is(TT_Unknown);
1374 if (!BeforePrevious ||
1375 // FIXME(bug 36976): ObjC return types shouldn't use TT_CastRParen.
1376 !(BeforePrevious->is(TT_CastRParen) ||
1377 (BeforePrevious->is(TT_ObjCMethodExpr) &&
1378 BeforePrevious->is(tok::colon))) ||
1379 BeforePrevious->is(tok::r_square) ||
1380 Contexts.back().LongestObjCSelectorName == 0 ||
1381 UnknownIdentifierInMethodDeclaration) {
1382 Tok->Previous->setType(TT_SelectorName);
1383 if (!Contexts.back().FirstObjCSelectorName) {
1384 Contexts.back().FirstObjCSelectorName = Tok->Previous;
1385 } else if (Tok->Previous->ColumnWidth >
1386 Contexts.back().LongestObjCSelectorName) {
1387 Contexts.back().LongestObjCSelectorName =
1388 Tok->Previous->ColumnWidth;
1390 Tok->Previous->ParameterIndex =
1391 Contexts.back().FirstObjCSelectorName->ObjCSelectorNameParts;
1392 ++Contexts.back().FirstObjCSelectorName->ObjCSelectorNameParts;
1394 } else if (Contexts.back().ColonIsForRangeExpr) {
1395 Tok->setType(TT_RangeBasedForLoopColon);
1396 } else if (Contexts.back().ContextType == Context::C11GenericSelection) {
1397 Tok->setType(TT_GenericSelectionColon);
1398 } else if (CurrentToken && CurrentToken->is(tok::numeric_constant)) {
1399 Tok->setType(TT_BitFieldColon);
1400 } else if (Contexts.size() == 1 &&
1401 !Line.First->isOneOf(tok::kw_enum, tok::kw_case,
1402 tok::kw_default)) {
1403 FormatToken *Prev = Tok->getPreviousNonComment();
1404 if (!Prev)
1405 break;
1406 if (Prev->isOneOf(tok::r_paren, tok::kw_noexcept) ||
1407 Prev->ClosesRequiresClause) {
1408 Tok->setType(TT_CtorInitializerColon);
1409 } else if (Prev->is(tok::kw_try)) {
1410 // Member initializer list within function try block.
1411 FormatToken *PrevPrev = Prev->getPreviousNonComment();
1412 if (!PrevPrev)
1413 break;
1414 if (PrevPrev && PrevPrev->isOneOf(tok::r_paren, tok::kw_noexcept))
1415 Tok->setType(TT_CtorInitializerColon);
1416 } else {
1417 Tok->setType(TT_InheritanceColon);
1419 } else if (canBeObjCSelectorComponent(*Tok->Previous) && Tok->Next &&
1420 (Tok->Next->isOneOf(tok::r_paren, tok::comma) ||
1421 (canBeObjCSelectorComponent(*Tok->Next) && Tok->Next->Next &&
1422 Tok->Next->Next->is(tok::colon)))) {
1423 // This handles a special macro in ObjC code where selectors including
1424 // the colon are passed as macro arguments.
1425 Tok->setType(TT_ObjCMethodExpr);
1426 } else if (Contexts.back().ContextKind == tok::l_paren &&
1427 !Line.InPragmaDirective) {
1428 if (Style.isTableGen() && Contexts.back().IsTableGenDAGArg) {
1429 Tok->setType(TT_TableGenDAGArgListColon);
1430 break;
1432 Tok->setType(TT_InlineASMColon);
1434 break;
1435 case tok::pipe:
1436 case tok::amp:
1437 // | and & in declarations/type expressions represent union and
1438 // intersection types, respectively.
1439 if (Style.isJavaScript() && !Contexts.back().IsExpression)
1440 Tok->setType(TT_JsTypeOperator);
1441 break;
1442 case tok::kw_if:
1443 if (Style.isTableGen()) {
1444 // In TableGen it has the form 'if' <value> 'then'.
1445 if (!parseTableGenValue())
1446 return false;
1447 if (CurrentToken && CurrentToken->is(Keywords.kw_then))
1448 next(); // skip then
1449 break;
1451 if (CurrentToken &&
1452 CurrentToken->isOneOf(tok::kw_constexpr, tok::identifier)) {
1453 next();
1455 [[fallthrough]];
1456 case tok::kw_while:
1457 if (CurrentToken && CurrentToken->is(tok::l_paren)) {
1458 next();
1459 if (!parseParens(/*LookForDecls=*/true))
1460 return false;
1462 break;
1463 case tok::kw_for:
1464 if (Style.isJavaScript()) {
1465 // x.for and {for: ...}
1466 if ((Tok->Previous && Tok->Previous->is(tok::period)) ||
1467 (Tok->Next && Tok->Next->is(tok::colon))) {
1468 break;
1470 // JS' for await ( ...
1471 if (CurrentToken && CurrentToken->is(Keywords.kw_await))
1472 next();
1474 if (IsCpp && CurrentToken && CurrentToken->is(tok::kw_co_await))
1475 next();
1476 Contexts.back().ColonIsForRangeExpr = true;
1477 if (!CurrentToken || CurrentToken->isNot(tok::l_paren))
1478 return false;
1479 next();
1480 if (!parseParens())
1481 return false;
1482 break;
1483 case tok::l_paren:
1484 // When faced with 'operator()()', the kw_operator handler incorrectly
1485 // marks the first l_paren as a OverloadedOperatorLParen. Here, we make
1486 // the first two parens OverloadedOperators and the second l_paren an
1487 // OverloadedOperatorLParen.
1488 if (Tok->Previous && Tok->Previous->is(tok::r_paren) &&
1489 Tok->Previous->MatchingParen &&
1490 Tok->Previous->MatchingParen->is(TT_OverloadedOperatorLParen)) {
1491 Tok->Previous->setType(TT_OverloadedOperator);
1492 Tok->Previous->MatchingParen->setType(TT_OverloadedOperator);
1493 Tok->setType(TT_OverloadedOperatorLParen);
1496 if (Style.isVerilog()) {
1497 // Identify the parameter list and port list in a module instantiation.
1498 // This is still needed when we already have
1499 // UnwrappedLineParser::parseVerilogHierarchyHeader because that
1500 // function is only responsible for the definition, not the
1501 // instantiation.
1502 auto IsInstancePort = [&]() {
1503 const FormatToken *Prev = Tok->getPreviousNonComment();
1504 const FormatToken *PrevPrev;
1505 // In the following example all 4 left parentheses will be treated as
1506 // 'TT_VerilogInstancePortLParen'.
1508 // module_x instance_1(port_1); // Case A.
1509 // module_x #(parameter_1) // Case B.
1510 // instance_2(port_1), // Case C.
1511 // instance_3(port_1); // Case D.
1512 if (!Prev || !(PrevPrev = Prev->getPreviousNonComment()))
1513 return false;
1514 // Case A.
1515 if (Keywords.isVerilogIdentifier(*Prev) &&
1516 Keywords.isVerilogIdentifier(*PrevPrev)) {
1517 return true;
1519 // Case B.
1520 if (Prev->is(Keywords.kw_verilogHash) &&
1521 Keywords.isVerilogIdentifier(*PrevPrev)) {
1522 return true;
1524 // Case C.
1525 if (Keywords.isVerilogIdentifier(*Prev) && PrevPrev->is(tok::r_paren))
1526 return true;
1527 // Case D.
1528 if (Keywords.isVerilogIdentifier(*Prev) && PrevPrev->is(tok::comma)) {
1529 const FormatToken *PrevParen = PrevPrev->getPreviousNonComment();
1530 if (PrevParen->is(tok::r_paren) && PrevParen->MatchingParen &&
1531 PrevParen->MatchingParen->is(TT_VerilogInstancePortLParen)) {
1532 return true;
1535 return false;
1538 if (IsInstancePort())
1539 Tok->setFinalizedType(TT_VerilogInstancePortLParen);
1542 if (!parseParens())
1543 return false;
1544 if (Line.MustBeDeclaration && Contexts.size() == 1 &&
1545 !Contexts.back().IsExpression && !Line.startsWith(TT_ObjCProperty) &&
1546 !Tok->isOneOf(TT_TypeDeclarationParen, TT_RequiresExpressionLParen)) {
1547 if (const auto *Previous = Tok->Previous;
1548 !Previous ||
1549 (!Previous->isAttribute() &&
1550 !Previous->isOneOf(TT_RequiresClause, TT_LeadingJavaAnnotation))) {
1551 Line.MightBeFunctionDecl = true;
1552 Tok->MightBeFunctionDeclParen = true;
1555 break;
1556 case tok::l_square:
1557 if (Style.isTableGen())
1558 Tok->setType(TT_TableGenListOpener);
1559 if (!parseSquare())
1560 return false;
1561 break;
1562 case tok::l_brace:
1563 if (Style.Language == FormatStyle::LK_TextProto) {
1564 FormatToken *Previous = Tok->getPreviousNonComment();
1565 if (Previous && Previous->isNot(TT_DictLiteral))
1566 Previous->setType(TT_SelectorName);
1568 Scopes.push_back(getScopeType(*Tok));
1569 if (!parseBrace())
1570 return false;
1571 break;
1572 case tok::less:
1573 if (parseAngle()) {
1574 Tok->setType(TT_TemplateOpener);
1575 // In TT_Proto, we must distignuish between:
1576 // map<key, value>
1577 // msg < item: data >
1578 // msg: < item: data >
1579 // In TT_TextProto, map<key, value> does not occur.
1580 if (Style.Language == FormatStyle::LK_TextProto ||
1581 (Style.Language == FormatStyle::LK_Proto && Tok->Previous &&
1582 Tok->Previous->isOneOf(TT_SelectorName, TT_DictLiteral))) {
1583 Tok->setType(TT_DictLiteral);
1584 FormatToken *Previous = Tok->getPreviousNonComment();
1585 if (Previous && Previous->isNot(TT_DictLiteral))
1586 Previous->setType(TT_SelectorName);
1588 if (Style.isTableGen())
1589 Tok->setType(TT_TemplateOpener);
1590 } else {
1591 Tok->setType(TT_BinaryOperator);
1592 NonTemplateLess.insert(Tok);
1593 CurrentToken = Tok;
1594 next();
1596 break;
1597 case tok::r_paren:
1598 case tok::r_square:
1599 return false;
1600 case tok::r_brace:
1601 // Don't pop scope when encountering unbalanced r_brace.
1602 if (!Scopes.empty())
1603 Scopes.pop_back();
1604 // Lines can start with '}'.
1605 if (Tok->Previous)
1606 return false;
1607 break;
1608 case tok::greater:
1609 if (Style.Language != FormatStyle::LK_TextProto)
1610 Tok->setType(TT_BinaryOperator);
1611 if (Tok->Previous && Tok->Previous->is(TT_TemplateCloser))
1612 Tok->SpacesRequiredBefore = 1;
1613 break;
1614 case tok::kw_operator:
1615 if (Style.isProto())
1616 break;
1617 while (CurrentToken &&
1618 !CurrentToken->isOneOf(tok::l_paren, tok::semi, tok::r_paren)) {
1619 if (CurrentToken->isOneOf(tok::star, tok::amp))
1620 CurrentToken->setType(TT_PointerOrReference);
1621 auto Next = CurrentToken->getNextNonComment();
1622 if (!Next)
1623 break;
1624 if (Next->is(tok::less))
1625 next();
1626 else
1627 consumeToken();
1628 if (!CurrentToken)
1629 break;
1630 auto Previous = CurrentToken->getPreviousNonComment();
1631 assert(Previous);
1632 if (CurrentToken->is(tok::comma) && Previous->isNot(tok::kw_operator))
1633 break;
1634 if (Previous->isOneOf(TT_BinaryOperator, TT_UnaryOperator, tok::comma,
1635 tok::star, tok::arrow, tok::amp, tok::ampamp) ||
1636 // User defined literal.
1637 Previous->TokenText.starts_with("\"\"")) {
1638 Previous->setType(TT_OverloadedOperator);
1639 if (CurrentToken->isOneOf(tok::less, tok::greater))
1640 break;
1643 if (CurrentToken && CurrentToken->is(tok::l_paren))
1644 CurrentToken->setType(TT_OverloadedOperatorLParen);
1645 if (CurrentToken && CurrentToken->Previous->is(TT_BinaryOperator))
1646 CurrentToken->Previous->setType(TT_OverloadedOperator);
1647 break;
1648 case tok::question:
1649 if (Style.isJavaScript() && Tok->Next &&
1650 Tok->Next->isOneOf(tok::semi, tok::comma, tok::colon, tok::r_paren,
1651 tok::r_brace, tok::r_square)) {
1652 // Question marks before semicolons, colons, etc. indicate optional
1653 // types (fields, parameters), e.g.
1654 // function(x?: string, y?) {...}
1655 // class X { y?; }
1656 Tok->setType(TT_JsTypeOptionalQuestion);
1657 break;
1659 // Declarations cannot be conditional expressions, this can only be part
1660 // of a type declaration.
1661 if (Line.MustBeDeclaration && !Contexts.back().IsExpression &&
1662 Style.isJavaScript()) {
1663 break;
1665 if (Style.isCSharp()) {
1666 // `Type?)`, `Type?>`, `Type? name;` and `Type? name =` can only be
1667 // nullable types.
1669 // `Type?)`, `Type?>`, `Type? name;`
1670 if (Tok->Next &&
1671 (Tok->Next->startsSequence(tok::question, tok::r_paren) ||
1672 Tok->Next->startsSequence(tok::question, tok::greater) ||
1673 Tok->Next->startsSequence(tok::question, tok::identifier,
1674 tok::semi))) {
1675 Tok->setType(TT_CSharpNullable);
1676 break;
1679 // `Type? name =`
1680 if (Tok->Next && Tok->Next->is(tok::identifier) && Tok->Next->Next &&
1681 Tok->Next->Next->is(tok::equal)) {
1682 Tok->setType(TT_CSharpNullable);
1683 break;
1686 // Line.MustBeDeclaration will be true for `Type? name;`.
1687 // But not
1688 // cond ? "A" : "B";
1689 // cond ? id : "B";
1690 // cond ? cond2 ? "A" : "B" : "C";
1691 if (!Contexts.back().IsExpression && Line.MustBeDeclaration &&
1692 (!Tok->Next ||
1693 !Tok->Next->isOneOf(tok::identifier, tok::string_literal) ||
1694 !Tok->Next->Next ||
1695 !Tok->Next->Next->isOneOf(tok::colon, tok::question))) {
1696 Tok->setType(TT_CSharpNullable);
1697 break;
1700 parseConditional();
1701 break;
1702 case tok::kw_template:
1703 parseTemplateDeclaration();
1704 break;
1705 case tok::comma:
1706 switch (Contexts.back().ContextType) {
1707 case Context::CtorInitializer:
1708 Tok->setType(TT_CtorInitializerComma);
1709 break;
1710 case Context::InheritanceList:
1711 Tok->setType(TT_InheritanceComma);
1712 break;
1713 case Context::VerilogInstancePortList:
1714 Tok->setFinalizedType(TT_VerilogInstancePortComma);
1715 break;
1716 default:
1717 if (Style.isVerilog() && Contexts.size() == 1 &&
1718 Line.startsWith(Keywords.kw_assign)) {
1719 Tok->setFinalizedType(TT_VerilogAssignComma);
1720 } else if (Contexts.back().FirstStartOfName &&
1721 (Contexts.size() == 1 || startsWithInitStatement(Line))) {
1722 Contexts.back().FirstStartOfName->PartOfMultiVariableDeclStmt = true;
1723 Line.IsMultiVariableDeclStmt = true;
1725 break;
1727 if (Contexts.back().ContextType == Context::ForEachMacro)
1728 Contexts.back().IsExpression = true;
1729 break;
1730 case tok::kw_default:
1731 // Unindent case labels.
1732 if (Style.isVerilog() && Keywords.isVerilogEndOfLabel(*Tok) &&
1733 (Line.Level > 1 || (!Line.InPPDirective && Line.Level > 0))) {
1734 --Line.Level;
1736 break;
1737 case tok::identifier:
1738 if (Tok->isOneOf(Keywords.kw___has_include,
1739 Keywords.kw___has_include_next)) {
1740 parseHasInclude();
1742 if (Style.isCSharp() && Tok->is(Keywords.kw_where) && Tok->Next &&
1743 Tok->Next->isNot(tok::l_paren)) {
1744 Tok->setType(TT_CSharpGenericTypeConstraint);
1745 parseCSharpGenericTypeConstraint();
1746 if (!Tok->getPreviousNonComment())
1747 Line.IsContinuation = true;
1749 if (Style.isTableGen()) {
1750 if (Tok->is(Keywords.kw_assert)) {
1751 if (!parseTableGenValue())
1752 return false;
1753 } else if (Tok->isOneOf(Keywords.kw_def, Keywords.kw_defm) &&
1754 (!Tok->Next ||
1755 !Tok->Next->isOneOf(tok::colon, tok::l_brace))) {
1756 // The case NameValue appears.
1757 if (!parseTableGenValue(true))
1758 return false;
1761 break;
1762 case tok::arrow:
1763 if (Tok->Previous && Tok->Previous->is(tok::kw_noexcept))
1764 Tok->setType(TT_TrailingReturnArrow);
1765 break;
1766 case tok::equal:
1767 // In TableGen, there must be a value after "=";
1768 if (Style.isTableGen() && !parseTableGenValue())
1769 return false;
1770 break;
1771 default:
1772 break;
1774 return true;
1777 void parseCSharpGenericTypeConstraint() {
1778 int OpenAngleBracketsCount = 0;
1779 while (CurrentToken) {
1780 if (CurrentToken->is(tok::less)) {
1781 // parseAngle is too greedy and will consume the whole line.
1782 CurrentToken->setType(TT_TemplateOpener);
1783 ++OpenAngleBracketsCount;
1784 next();
1785 } else if (CurrentToken->is(tok::greater)) {
1786 CurrentToken->setType(TT_TemplateCloser);
1787 --OpenAngleBracketsCount;
1788 next();
1789 } else if (CurrentToken->is(tok::comma) && OpenAngleBracketsCount == 0) {
1790 // We allow line breaks after GenericTypeConstraintComma's
1791 // so do not flag commas in Generics as GenericTypeConstraintComma's.
1792 CurrentToken->setType(TT_CSharpGenericTypeConstraintComma);
1793 next();
1794 } else if (CurrentToken->is(Keywords.kw_where)) {
1795 CurrentToken->setType(TT_CSharpGenericTypeConstraint);
1796 next();
1797 } else if (CurrentToken->is(tok::colon)) {
1798 CurrentToken->setType(TT_CSharpGenericTypeConstraintColon);
1799 next();
1800 } else {
1801 next();
1806 void parseIncludeDirective() {
1807 if (CurrentToken && CurrentToken->is(tok::less)) {
1808 next();
1809 while (CurrentToken) {
1810 // Mark tokens up to the trailing line comments as implicit string
1811 // literals.
1812 if (CurrentToken->isNot(tok::comment) &&
1813 !CurrentToken->TokenText.starts_with("//")) {
1814 CurrentToken->setType(TT_ImplicitStringLiteral);
1816 next();
1821 void parseWarningOrError() {
1822 next();
1823 // We still want to format the whitespace left of the first token of the
1824 // warning or error.
1825 next();
1826 while (CurrentToken) {
1827 CurrentToken->setType(TT_ImplicitStringLiteral);
1828 next();
1832 void parsePragma() {
1833 next(); // Consume "pragma".
1834 if (CurrentToken &&
1835 CurrentToken->isOneOf(Keywords.kw_mark, Keywords.kw_option,
1836 Keywords.kw_region)) {
1837 bool IsMarkOrRegion =
1838 CurrentToken->isOneOf(Keywords.kw_mark, Keywords.kw_region);
1839 next();
1840 next(); // Consume first token (so we fix leading whitespace).
1841 while (CurrentToken) {
1842 if (IsMarkOrRegion || CurrentToken->Previous->is(TT_BinaryOperator))
1843 CurrentToken->setType(TT_ImplicitStringLiteral);
1844 next();
1849 void parseHasInclude() {
1850 if (!CurrentToken || CurrentToken->isNot(tok::l_paren))
1851 return;
1852 next(); // '('
1853 parseIncludeDirective();
1854 next(); // ')'
1857 LineType parsePreprocessorDirective() {
1858 bool IsFirstToken = CurrentToken->IsFirst;
1859 LineType Type = LT_PreprocessorDirective;
1860 next();
1861 if (!CurrentToken)
1862 return Type;
1864 if (Style.isJavaScript() && IsFirstToken) {
1865 // JavaScript files can contain shebang lines of the form:
1866 // #!/usr/bin/env node
1867 // Treat these like C++ #include directives.
1868 while (CurrentToken) {
1869 // Tokens cannot be comments here.
1870 CurrentToken->setType(TT_ImplicitStringLiteral);
1871 next();
1873 return LT_ImportStatement;
1876 if (CurrentToken->is(tok::numeric_constant)) {
1877 CurrentToken->SpacesRequiredBefore = 1;
1878 return Type;
1880 // Hashes in the middle of a line can lead to any strange token
1881 // sequence.
1882 if (!CurrentToken->Tok.getIdentifierInfo())
1883 return Type;
1884 // In Verilog macro expansions start with a backtick just like preprocessor
1885 // directives. Thus we stop if the word is not a preprocessor directive.
1886 if (Style.isVerilog() && !Keywords.isVerilogPPDirective(*CurrentToken))
1887 return LT_Invalid;
1888 switch (CurrentToken->Tok.getIdentifierInfo()->getPPKeywordID()) {
1889 case tok::pp_include:
1890 case tok::pp_include_next:
1891 case tok::pp_import:
1892 next();
1893 parseIncludeDirective();
1894 Type = LT_ImportStatement;
1895 break;
1896 case tok::pp_error:
1897 case tok::pp_warning:
1898 parseWarningOrError();
1899 break;
1900 case tok::pp_pragma:
1901 parsePragma();
1902 break;
1903 case tok::pp_if:
1904 case tok::pp_elif:
1905 Contexts.back().IsExpression = true;
1906 next();
1907 if (CurrentToken)
1908 CurrentToken->SpacesRequiredBefore = true;
1909 parseLine();
1910 break;
1911 default:
1912 break;
1914 while (CurrentToken) {
1915 FormatToken *Tok = CurrentToken;
1916 next();
1917 if (Tok->is(tok::l_paren)) {
1918 parseParens();
1919 } else if (Tok->isOneOf(Keywords.kw___has_include,
1920 Keywords.kw___has_include_next)) {
1921 parseHasInclude();
1924 return Type;
1927 public:
1928 LineType parseLine() {
1929 if (!CurrentToken)
1930 return LT_Invalid;
1931 NonTemplateLess.clear();
1932 if (!Line.InMacroBody && CurrentToken->is(tok::hash)) {
1933 // We were not yet allowed to use C++17 optional when this was being
1934 // written. So we used LT_Invalid to mark that the line is not a
1935 // preprocessor directive.
1936 auto Type = parsePreprocessorDirective();
1937 if (Type != LT_Invalid)
1938 return Type;
1941 // Directly allow to 'import <string-literal>' to support protocol buffer
1942 // definitions (github.com/google/protobuf) or missing "#" (either way we
1943 // should not break the line).
1944 IdentifierInfo *Info = CurrentToken->Tok.getIdentifierInfo();
1945 if ((Style.Language == FormatStyle::LK_Java &&
1946 CurrentToken->is(Keywords.kw_package)) ||
1947 (!Style.isVerilog() && Info &&
1948 Info->getPPKeywordID() == tok::pp_import && CurrentToken->Next &&
1949 CurrentToken->Next->isOneOf(tok::string_literal, tok::identifier,
1950 tok::kw_static))) {
1951 next();
1952 parseIncludeDirective();
1953 return LT_ImportStatement;
1956 // If this line starts and ends in '<' and '>', respectively, it is likely
1957 // part of "#define <a/b.h>".
1958 if (CurrentToken->is(tok::less) && Line.Last->is(tok::greater)) {
1959 parseIncludeDirective();
1960 return LT_ImportStatement;
1963 // In .proto files, top-level options and package statements are very
1964 // similar to import statements and should not be line-wrapped.
1965 if (Style.Language == FormatStyle::LK_Proto && Line.Level == 0 &&
1966 CurrentToken->isOneOf(Keywords.kw_option, Keywords.kw_package)) {
1967 next();
1968 if (CurrentToken && CurrentToken->is(tok::identifier)) {
1969 while (CurrentToken)
1970 next();
1971 return LT_ImportStatement;
1975 bool KeywordVirtualFound = false;
1976 bool ImportStatement = false;
1978 // import {...} from '...';
1979 if (Style.isJavaScript() && CurrentToken->is(Keywords.kw_import))
1980 ImportStatement = true;
1982 while (CurrentToken) {
1983 if (CurrentToken->is(tok::kw_virtual))
1984 KeywordVirtualFound = true;
1985 if (Style.isJavaScript()) {
1986 // export {...} from '...';
1987 // An export followed by "from 'some string';" is a re-export from
1988 // another module identified by a URI and is treated as a
1989 // LT_ImportStatement (i.e. prevent wraps on it for long URIs).
1990 // Just "export {...};" or "export class ..." should not be treated as
1991 // an import in this sense.
1992 if (Line.First->is(tok::kw_export) &&
1993 CurrentToken->is(Keywords.kw_from) && CurrentToken->Next &&
1994 CurrentToken->Next->isStringLiteral()) {
1995 ImportStatement = true;
1997 if (isClosureImportStatement(*CurrentToken))
1998 ImportStatement = true;
2000 if (!consumeToken())
2001 return LT_Invalid;
2003 if (KeywordVirtualFound)
2004 return LT_VirtualFunctionDecl;
2005 if (ImportStatement)
2006 return LT_ImportStatement;
2008 if (Line.startsWith(TT_ObjCMethodSpecifier)) {
2009 if (Contexts.back().FirstObjCSelectorName) {
2010 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
2011 Contexts.back().LongestObjCSelectorName;
2013 return LT_ObjCMethodDecl;
2016 for (const auto &ctx : Contexts)
2017 if (ctx.ContextType == Context::StructArrayInitializer)
2018 return LT_ArrayOfStructInitializer;
2020 return LT_Other;
2023 private:
2024 bool isClosureImportStatement(const FormatToken &Tok) {
2025 // FIXME: Closure-library specific stuff should not be hard-coded but be
2026 // configurable.
2027 return Tok.TokenText == "goog" && Tok.Next && Tok.Next->is(tok::period) &&
2028 Tok.Next->Next &&
2029 (Tok.Next->Next->TokenText == "module" ||
2030 Tok.Next->Next->TokenText == "provide" ||
2031 Tok.Next->Next->TokenText == "require" ||
2032 Tok.Next->Next->TokenText == "requireType" ||
2033 Tok.Next->Next->TokenText == "forwardDeclare") &&
2034 Tok.Next->Next->Next && Tok.Next->Next->Next->is(tok::l_paren);
2037 void resetTokenMetadata() {
2038 if (!CurrentToken)
2039 return;
2041 // Reset token type in case we have already looked at it and then
2042 // recovered from an error (e.g. failure to find the matching >).
2043 if (!CurrentToken->isTypeFinalized() &&
2044 !CurrentToken->isOneOf(
2045 TT_LambdaLSquare, TT_LambdaLBrace, TT_AttributeMacro, TT_IfMacro,
2046 TT_ForEachMacro, TT_TypenameMacro, TT_FunctionLBrace,
2047 TT_ImplicitStringLiteral, TT_InlineASMBrace, TT_FatArrow,
2048 TT_NamespaceMacro, TT_OverloadedOperator, TT_RegexLiteral,
2049 TT_TemplateString, TT_ObjCStringLiteral, TT_UntouchableMacroFunc,
2050 TT_StatementAttributeLikeMacro, TT_FunctionLikeOrFreestandingMacro,
2051 TT_ClassLBrace, TT_EnumLBrace, TT_RecordLBrace, TT_StructLBrace,
2052 TT_UnionLBrace, TT_RequiresClause,
2053 TT_RequiresClauseInARequiresExpression, TT_RequiresExpression,
2054 TT_RequiresExpressionLParen, TT_RequiresExpressionLBrace,
2055 TT_BracedListLBrace)) {
2056 CurrentToken->setType(TT_Unknown);
2058 CurrentToken->Role.reset();
2059 CurrentToken->MatchingParen = nullptr;
2060 CurrentToken->FakeLParens.clear();
2061 CurrentToken->FakeRParens = 0;
2064 void next() {
2065 if (!CurrentToken)
2066 return;
2068 CurrentToken->NestingLevel = Contexts.size() - 1;
2069 CurrentToken->BindingStrength = Contexts.back().BindingStrength;
2070 modifyContext(*CurrentToken);
2071 determineTokenType(*CurrentToken);
2072 CurrentToken = CurrentToken->Next;
2074 resetTokenMetadata();
2077 /// A struct to hold information valid in a specific context, e.g.
2078 /// a pair of parenthesis.
2079 struct Context {
2080 Context(tok::TokenKind ContextKind, unsigned BindingStrength,
2081 bool IsExpression)
2082 : ContextKind(ContextKind), BindingStrength(BindingStrength),
2083 IsExpression(IsExpression) {}
2085 tok::TokenKind ContextKind;
2086 unsigned BindingStrength;
2087 bool IsExpression;
2088 unsigned LongestObjCSelectorName = 0;
2089 bool ColonIsForRangeExpr = false;
2090 bool ColonIsDictLiteral = false;
2091 bool ColonIsObjCMethodExpr = false;
2092 FormatToken *FirstObjCSelectorName = nullptr;
2093 FormatToken *FirstStartOfName = nullptr;
2094 bool CanBeExpression = true;
2095 bool CaretFound = false;
2096 bool InCpp11AttributeSpecifier = false;
2097 bool InCSharpAttributeSpecifier = false;
2098 bool VerilogAssignmentFound = false;
2099 // Whether the braces may mean concatenation instead of structure or array
2100 // literal.
2101 bool VerilogMayBeConcatenation = false;
2102 bool IsTableGenDAGArg = false;
2103 bool IsTableGenBangOpe = false;
2104 bool IsTableGenCondOpe = false;
2105 enum {
2106 Unknown,
2107 // Like the part after `:` in a constructor.
2108 // Context(...) : IsExpression(IsExpression)
2109 CtorInitializer,
2110 // Like in the parentheses in a foreach.
2111 ForEachMacro,
2112 // Like the inheritance list in a class declaration.
2113 // class Input : public IO
2114 InheritanceList,
2115 // Like in the braced list.
2116 // int x[] = {};
2117 StructArrayInitializer,
2118 // Like in `static_cast<int>`.
2119 TemplateArgument,
2120 // C11 _Generic selection.
2121 C11GenericSelection,
2122 // Like in the outer parentheses in `ffnand ff1(.q());`.
2123 VerilogInstancePortList,
2124 } ContextType = Unknown;
2127 /// Puts a new \c Context onto the stack \c Contexts for the lifetime
2128 /// of each instance.
2129 struct ScopedContextCreator {
2130 AnnotatingParser &P;
2132 ScopedContextCreator(AnnotatingParser &P, tok::TokenKind ContextKind,
2133 unsigned Increase)
2134 : P(P) {
2135 P.Contexts.push_back(Context(ContextKind,
2136 P.Contexts.back().BindingStrength + Increase,
2137 P.Contexts.back().IsExpression));
2140 ~ScopedContextCreator() {
2141 if (P.Style.AlignArrayOfStructures != FormatStyle::AIAS_None) {
2142 if (P.Contexts.back().ContextType == Context::StructArrayInitializer) {
2143 P.Contexts.pop_back();
2144 P.Contexts.back().ContextType = Context::StructArrayInitializer;
2145 return;
2148 P.Contexts.pop_back();
2152 void modifyContext(const FormatToken &Current) {
2153 auto AssignmentStartsExpression = [&]() {
2154 if (Current.getPrecedence() != prec::Assignment)
2155 return false;
2157 if (Line.First->isOneOf(tok::kw_using, tok::kw_return))
2158 return false;
2159 if (Line.First->is(tok::kw_template)) {
2160 assert(Current.Previous);
2161 if (Current.Previous->is(tok::kw_operator)) {
2162 // `template ... operator=` cannot be an expression.
2163 return false;
2166 // `template` keyword can start a variable template.
2167 const FormatToken *Tok = Line.First->getNextNonComment();
2168 assert(Tok); // Current token is on the same line.
2169 if (Tok->isNot(TT_TemplateOpener)) {
2170 // Explicit template instantiations do not have `<>`.
2171 return false;
2174 // This is the default value of a template parameter, determine if it's
2175 // type or non-type.
2176 if (Contexts.back().ContextKind == tok::less) {
2177 assert(Current.Previous->Previous);
2178 return !Current.Previous->Previous->isOneOf(tok::kw_typename,
2179 tok::kw_class);
2182 Tok = Tok->MatchingParen;
2183 if (!Tok)
2184 return false;
2185 Tok = Tok->getNextNonComment();
2186 if (!Tok)
2187 return false;
2189 if (Tok->isOneOf(tok::kw_class, tok::kw_enum, tok::kw_struct,
2190 tok::kw_using)) {
2191 return false;
2194 return true;
2197 // Type aliases use `type X = ...;` in TypeScript and can be exported
2198 // using `export type ...`.
2199 if (Style.isJavaScript() &&
2200 (Line.startsWith(Keywords.kw_type, tok::identifier) ||
2201 Line.startsWith(tok::kw_export, Keywords.kw_type,
2202 tok::identifier))) {
2203 return false;
2206 return !Current.Previous || Current.Previous->isNot(tok::kw_operator);
2209 if (AssignmentStartsExpression()) {
2210 Contexts.back().IsExpression = true;
2211 if (!Line.startsWith(TT_UnaryOperator)) {
2212 for (FormatToken *Previous = Current.Previous;
2213 Previous && Previous->Previous &&
2214 !Previous->Previous->isOneOf(tok::comma, tok::semi);
2215 Previous = Previous->Previous) {
2216 if (Previous->isOneOf(tok::r_square, tok::r_paren, tok::greater)) {
2217 Previous = Previous->MatchingParen;
2218 if (!Previous)
2219 break;
2221 if (Previous->opensScope())
2222 break;
2223 if (Previous->isOneOf(TT_BinaryOperator, TT_UnaryOperator) &&
2224 Previous->isPointerOrReference() && Previous->Previous &&
2225 Previous->Previous->isNot(tok::equal)) {
2226 Previous->setType(TT_PointerOrReference);
2230 } else if (Current.is(tok::lessless) &&
2231 (!Current.Previous ||
2232 Current.Previous->isNot(tok::kw_operator))) {
2233 Contexts.back().IsExpression = true;
2234 } else if (Current.isOneOf(tok::kw_return, tok::kw_throw)) {
2235 Contexts.back().IsExpression = true;
2236 } else if (Current.is(TT_TrailingReturnArrow)) {
2237 Contexts.back().IsExpression = false;
2238 } else if (Current.is(Keywords.kw_assert)) {
2239 Contexts.back().IsExpression = Style.Language == FormatStyle::LK_Java;
2240 } else if (Current.Previous &&
2241 Current.Previous->is(TT_CtorInitializerColon)) {
2242 Contexts.back().IsExpression = true;
2243 Contexts.back().ContextType = Context::CtorInitializer;
2244 } else if (Current.Previous && Current.Previous->is(TT_InheritanceColon)) {
2245 Contexts.back().ContextType = Context::InheritanceList;
2246 } else if (Current.isOneOf(tok::r_paren, tok::greater, tok::comma)) {
2247 for (FormatToken *Previous = Current.Previous;
2248 Previous && Previous->isOneOf(tok::star, tok::amp);
2249 Previous = Previous->Previous) {
2250 Previous->setType(TT_PointerOrReference);
2252 if (Line.MustBeDeclaration &&
2253 Contexts.front().ContextType != Context::CtorInitializer) {
2254 Contexts.back().IsExpression = false;
2256 } else if (Current.is(tok::kw_new)) {
2257 Contexts.back().CanBeExpression = false;
2258 } else if (Current.is(tok::semi) ||
2259 (Current.is(tok::exclaim) && Current.Previous &&
2260 Current.Previous->isNot(tok::kw_operator))) {
2261 // This should be the condition or increment in a for-loop.
2262 // But not operator !() (can't use TT_OverloadedOperator here as its not
2263 // been annotated yet).
2264 Contexts.back().IsExpression = true;
2268 static FormatToken *untilMatchingParen(FormatToken *Current) {
2269 // Used when `MatchingParen` is not yet established.
2270 int ParenLevel = 0;
2271 while (Current) {
2272 if (Current->is(tok::l_paren))
2273 ++ParenLevel;
2274 if (Current->is(tok::r_paren))
2275 --ParenLevel;
2276 if (ParenLevel < 1)
2277 break;
2278 Current = Current->Next;
2280 return Current;
2283 static bool isDeductionGuide(FormatToken &Current) {
2284 // Look for a deduction guide template<T> A(...) -> A<...>;
2285 if (Current.Previous && Current.Previous->is(tok::r_paren) &&
2286 Current.startsSequence(tok::arrow, tok::identifier, tok::less)) {
2287 // Find the TemplateCloser.
2288 FormatToken *TemplateCloser = Current.Next->Next;
2289 int NestingLevel = 0;
2290 while (TemplateCloser) {
2291 // Skip over an expressions in parens A<(3 < 2)>;
2292 if (TemplateCloser->is(tok::l_paren)) {
2293 // No Matching Paren yet so skip to matching paren
2294 TemplateCloser = untilMatchingParen(TemplateCloser);
2295 if (!TemplateCloser)
2296 break;
2298 if (TemplateCloser->is(tok::less))
2299 ++NestingLevel;
2300 if (TemplateCloser->is(tok::greater))
2301 --NestingLevel;
2302 if (NestingLevel < 1)
2303 break;
2304 TemplateCloser = TemplateCloser->Next;
2306 // Assuming we have found the end of the template ensure its followed
2307 // with a semi-colon.
2308 if (TemplateCloser && TemplateCloser->Next &&
2309 TemplateCloser->Next->is(tok::semi) &&
2310 Current.Previous->MatchingParen) {
2311 // Determine if the identifier `A` prior to the A<..>; is the same as
2312 // prior to the A(..)
2313 FormatToken *LeadingIdentifier =
2314 Current.Previous->MatchingParen->Previous;
2316 return LeadingIdentifier &&
2317 LeadingIdentifier->TokenText == Current.Next->TokenText;
2320 return false;
2323 void determineTokenType(FormatToken &Current) {
2324 if (Current.isNot(TT_Unknown)) {
2325 // The token type is already known.
2326 return;
2329 if ((Style.isJavaScript() || Style.isCSharp()) &&
2330 Current.is(tok::exclaim)) {
2331 if (Current.Previous) {
2332 bool IsIdentifier =
2333 Style.isJavaScript()
2334 ? Keywords.IsJavaScriptIdentifier(
2335 *Current.Previous, /* AcceptIdentifierName= */ true)
2336 : Current.Previous->is(tok::identifier);
2337 if (IsIdentifier ||
2338 Current.Previous->isOneOf(
2339 tok::kw_default, tok::kw_namespace, tok::r_paren, tok::r_square,
2340 tok::r_brace, tok::kw_false, tok::kw_true, Keywords.kw_type,
2341 Keywords.kw_get, Keywords.kw_init, Keywords.kw_set) ||
2342 Current.Previous->Tok.isLiteral()) {
2343 Current.setType(TT_NonNullAssertion);
2344 return;
2347 if (Current.Next &&
2348 Current.Next->isOneOf(TT_BinaryOperator, Keywords.kw_as)) {
2349 Current.setType(TT_NonNullAssertion);
2350 return;
2354 // Line.MightBeFunctionDecl can only be true after the parentheses of a
2355 // function declaration have been found. In this case, 'Current' is a
2356 // trailing token of this declaration and thus cannot be a name.
2357 if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) &&
2358 Current.is(Keywords.kw_instanceof)) {
2359 Current.setType(TT_BinaryOperator);
2360 } else if (isStartOfName(Current) &&
2361 (!Line.MightBeFunctionDecl || Current.NestingLevel != 0)) {
2362 Contexts.back().FirstStartOfName = &Current;
2363 Current.setType(TT_StartOfName);
2364 } else if (Current.is(tok::semi)) {
2365 // Reset FirstStartOfName after finding a semicolon so that a for loop
2366 // with multiple increment statements is not confused with a for loop
2367 // having multiple variable declarations.
2368 Contexts.back().FirstStartOfName = nullptr;
2369 } else if (Current.isOneOf(tok::kw_auto, tok::kw___auto_type)) {
2370 AutoFound = true;
2371 } else if (Current.is(tok::arrow) &&
2372 Style.Language == FormatStyle::LK_Java) {
2373 Current.setType(TT_TrailingReturnArrow);
2374 } else if (Current.is(tok::arrow) && Style.isVerilog()) {
2375 // The implication operator.
2376 Current.setType(TT_BinaryOperator);
2377 } else if (Current.is(tok::arrow) && AutoFound &&
2378 Line.MightBeFunctionDecl && Current.NestingLevel == 0 &&
2379 !Current.Previous->isOneOf(tok::kw_operator, tok::identifier)) {
2380 // not auto operator->() -> xxx;
2381 Current.setType(TT_TrailingReturnArrow);
2382 } else if (Current.is(tok::arrow) && Current.Previous &&
2383 Current.Previous->is(tok::r_brace)) {
2384 // Concept implicit conversion constraint needs to be treated like
2385 // a trailing return type ... } -> <type>.
2386 Current.setType(TT_TrailingReturnArrow);
2387 } else if (isDeductionGuide(Current)) {
2388 // Deduction guides trailing arrow " A(...) -> A<T>;".
2389 Current.setType(TT_TrailingReturnArrow);
2390 } else if (Current.isPointerOrReference()) {
2391 Current.setType(determineStarAmpUsage(
2392 Current,
2393 Contexts.back().CanBeExpression && Contexts.back().IsExpression,
2394 Contexts.back().ContextType == Context::TemplateArgument));
2395 } else if (Current.isOneOf(tok::minus, tok::plus, tok::caret) ||
2396 (Style.isVerilog() && Current.is(tok::pipe))) {
2397 Current.setType(determinePlusMinusCaretUsage(Current));
2398 if (Current.is(TT_UnaryOperator) && Current.is(tok::caret))
2399 Contexts.back().CaretFound = true;
2400 } else if (Current.isOneOf(tok::minusminus, tok::plusplus)) {
2401 Current.setType(determineIncrementUsage(Current));
2402 } else if (Current.isOneOf(tok::exclaim, tok::tilde)) {
2403 Current.setType(TT_UnaryOperator);
2404 } else if (Current.is(tok::question)) {
2405 if (Style.isJavaScript() && Line.MustBeDeclaration &&
2406 !Contexts.back().IsExpression) {
2407 // In JavaScript, `interface X { foo?(): bar; }` is an optional method
2408 // on the interface, not a ternary expression.
2409 Current.setType(TT_JsTypeOptionalQuestion);
2410 } else if (Style.isTableGen()) {
2411 // In TableGen, '?' is just an identifier like token.
2412 Current.setType(TT_Unknown);
2413 } else {
2414 Current.setType(TT_ConditionalExpr);
2416 } else if (Current.isBinaryOperator() &&
2417 (!Current.Previous || Current.Previous->isNot(tok::l_square)) &&
2418 (Current.isNot(tok::greater) &&
2419 Style.Language != FormatStyle::LK_TextProto)) {
2420 if (Style.isVerilog()) {
2421 if (Current.is(tok::lessequal) && Contexts.size() == 1 &&
2422 !Contexts.back().VerilogAssignmentFound) {
2423 // In Verilog `<=` is assignment if in its own statement. It is a
2424 // statement instead of an expression, that is it can not be chained.
2425 Current.ForcedPrecedence = prec::Assignment;
2426 Current.setFinalizedType(TT_BinaryOperator);
2428 if (Current.getPrecedence() == prec::Assignment)
2429 Contexts.back().VerilogAssignmentFound = true;
2431 Current.setType(TT_BinaryOperator);
2432 } else if (Current.is(tok::comment)) {
2433 if (Current.TokenText.starts_with("/*")) {
2434 if (Current.TokenText.ends_with("*/")) {
2435 Current.setType(TT_BlockComment);
2436 } else {
2437 // The lexer has for some reason determined a comment here. But we
2438 // cannot really handle it, if it isn't properly terminated.
2439 Current.Tok.setKind(tok::unknown);
2441 } else {
2442 Current.setType(TT_LineComment);
2444 } else if (Current.is(tok::string_literal)) {
2445 if (Style.isVerilog() && Contexts.back().VerilogMayBeConcatenation &&
2446 Current.getPreviousNonComment() &&
2447 Current.getPreviousNonComment()->isOneOf(tok::comma, tok::l_brace) &&
2448 Current.getNextNonComment() &&
2449 Current.getNextNonComment()->isOneOf(tok::comma, tok::r_brace)) {
2450 Current.setType(TT_StringInConcatenation);
2452 } else if (Current.is(tok::l_paren)) {
2453 if (lParenStartsCppCast(Current))
2454 Current.setType(TT_CppCastLParen);
2455 } else if (Current.is(tok::r_paren)) {
2456 if (rParenEndsCast(Current))
2457 Current.setType(TT_CastRParen);
2458 if (Current.MatchingParen && Current.Next &&
2459 !Current.Next->isBinaryOperator() &&
2460 !Current.Next->isOneOf(tok::semi, tok::colon, tok::l_brace,
2461 tok::comma, tok::period, tok::arrow,
2462 tok::coloncolon, tok::kw_noexcept)) {
2463 if (FormatToken *AfterParen = Current.MatchingParen->Next;
2464 AfterParen && AfterParen->isNot(tok::caret)) {
2465 // Make sure this isn't the return type of an Obj-C block declaration.
2466 if (FormatToken *BeforeParen = Current.MatchingParen->Previous;
2467 BeforeParen && BeforeParen->is(tok::identifier) &&
2468 BeforeParen->isNot(TT_TypenameMacro) &&
2469 BeforeParen->TokenText == BeforeParen->TokenText.upper() &&
2470 (!BeforeParen->Previous ||
2471 BeforeParen->Previous->ClosesTemplateDeclaration ||
2472 BeforeParen->Previous->ClosesRequiresClause)) {
2473 Current.setType(TT_FunctionAnnotationRParen);
2477 } else if (Current.is(tok::at) && Current.Next && !Style.isJavaScript() &&
2478 Style.Language != FormatStyle::LK_Java) {
2479 // In Java & JavaScript, "@..." is a decorator or annotation. In ObjC, it
2480 // marks declarations and properties that need special formatting.
2481 switch (Current.Next->Tok.getObjCKeywordID()) {
2482 case tok::objc_interface:
2483 case tok::objc_implementation:
2484 case tok::objc_protocol:
2485 Current.setType(TT_ObjCDecl);
2486 break;
2487 case tok::objc_property:
2488 Current.setType(TT_ObjCProperty);
2489 break;
2490 default:
2491 break;
2493 } else if (Current.is(tok::period)) {
2494 FormatToken *PreviousNoComment = Current.getPreviousNonComment();
2495 if (PreviousNoComment &&
2496 PreviousNoComment->isOneOf(tok::comma, tok::l_brace)) {
2497 Current.setType(TT_DesignatedInitializerPeriod);
2498 } else if (Style.Language == FormatStyle::LK_Java && Current.Previous &&
2499 Current.Previous->isOneOf(TT_JavaAnnotation,
2500 TT_LeadingJavaAnnotation)) {
2501 Current.setType(Current.Previous->getType());
2503 } else if (canBeObjCSelectorComponent(Current) &&
2504 // FIXME(bug 36976): ObjC return types shouldn't use
2505 // TT_CastRParen.
2506 Current.Previous && Current.Previous->is(TT_CastRParen) &&
2507 Current.Previous->MatchingParen &&
2508 Current.Previous->MatchingParen->Previous &&
2509 Current.Previous->MatchingParen->Previous->is(
2510 TT_ObjCMethodSpecifier)) {
2511 // This is the first part of an Objective-C selector name. (If there's no
2512 // colon after this, this is the only place which annotates the identifier
2513 // as a selector.)
2514 Current.setType(TT_SelectorName);
2515 } else if (Current.isOneOf(tok::identifier, tok::kw_const, tok::kw_noexcept,
2516 tok::kw_requires) &&
2517 Current.Previous &&
2518 !Current.Previous->isOneOf(tok::equal, tok::at,
2519 TT_CtorInitializerComma,
2520 TT_CtorInitializerColon) &&
2521 Line.MightBeFunctionDecl && Contexts.size() == 1) {
2522 // Line.MightBeFunctionDecl can only be true after the parentheses of a
2523 // function declaration have been found.
2524 Current.setType(TT_TrailingAnnotation);
2525 } else if ((Style.Language == FormatStyle::LK_Java ||
2526 Style.isJavaScript()) &&
2527 Current.Previous) {
2528 if (Current.Previous->is(tok::at) &&
2529 Current.isNot(Keywords.kw_interface)) {
2530 const FormatToken &AtToken = *Current.Previous;
2531 const FormatToken *Previous = AtToken.getPreviousNonComment();
2532 if (!Previous || Previous->is(TT_LeadingJavaAnnotation))
2533 Current.setType(TT_LeadingJavaAnnotation);
2534 else
2535 Current.setType(TT_JavaAnnotation);
2536 } else if (Current.Previous->is(tok::period) &&
2537 Current.Previous->isOneOf(TT_JavaAnnotation,
2538 TT_LeadingJavaAnnotation)) {
2539 Current.setType(Current.Previous->getType());
2544 /// Take a guess at whether \p Tok starts a name of a function or
2545 /// variable declaration.
2547 /// This is a heuristic based on whether \p Tok is an identifier following
2548 /// something that is likely a type.
2549 bool isStartOfName(const FormatToken &Tok) {
2550 // Handled in ExpressionParser for Verilog.
2551 if (Style.isVerilog())
2552 return false;
2554 if (Tok.isNot(tok::identifier) || !Tok.Previous)
2555 return false;
2557 if (const auto *NextNonComment = Tok.getNextNonComment();
2558 (!NextNonComment && !Line.InMacroBody) ||
2559 (NextNonComment &&
2560 (NextNonComment->isPointerOrReference() ||
2561 NextNonComment->is(tok::string_literal) ||
2562 (Line.InPragmaDirective && NextNonComment->is(tok::identifier))))) {
2563 return false;
2566 if (Tok.Previous->isOneOf(TT_LeadingJavaAnnotation, Keywords.kw_instanceof,
2567 Keywords.kw_as)) {
2568 return false;
2570 if (Style.isJavaScript() && Tok.Previous->is(Keywords.kw_in))
2571 return false;
2573 // Skip "const" as it does not have an influence on whether this is a name.
2574 FormatToken *PreviousNotConst = Tok.getPreviousNonComment();
2576 // For javascript const can be like "let" or "var"
2577 if (!Style.isJavaScript())
2578 while (PreviousNotConst && PreviousNotConst->is(tok::kw_const))
2579 PreviousNotConst = PreviousNotConst->getPreviousNonComment();
2581 if (!PreviousNotConst)
2582 return false;
2584 if (PreviousNotConst->ClosesRequiresClause)
2585 return false;
2587 if (Style.isTableGen()) {
2588 // keywords such as let and def* defines names.
2589 if (Keywords.isTableGenDefinition(*PreviousNotConst))
2590 return true;
2591 // Otherwise C++ style declarations is available only inside the brace.
2592 if (Contexts.back().ContextKind != tok::l_brace)
2593 return false;
2596 bool IsPPKeyword = PreviousNotConst->is(tok::identifier) &&
2597 PreviousNotConst->Previous &&
2598 PreviousNotConst->Previous->is(tok::hash);
2600 if (PreviousNotConst->is(TT_TemplateCloser)) {
2601 return PreviousNotConst && PreviousNotConst->MatchingParen &&
2602 PreviousNotConst->MatchingParen->Previous &&
2603 PreviousNotConst->MatchingParen->Previous->isNot(tok::period) &&
2604 PreviousNotConst->MatchingParen->Previous->isNot(tok::kw_template);
2607 if ((PreviousNotConst->is(tok::r_paren) &&
2608 PreviousNotConst->is(TT_TypeDeclarationParen)) ||
2609 PreviousNotConst->is(TT_AttributeRParen)) {
2610 return true;
2613 // If is a preprocess keyword like #define.
2614 if (IsPPKeyword)
2615 return false;
2617 // int a or auto a.
2618 if (PreviousNotConst->isOneOf(tok::identifier, tok::kw_auto))
2619 return true;
2621 // *a or &a or &&a.
2622 if (PreviousNotConst->is(TT_PointerOrReference))
2623 return true;
2625 // MyClass a;
2626 if (PreviousNotConst->isTypeName(IsCpp))
2627 return true;
2629 // type[] a in Java
2630 if (Style.Language == FormatStyle::LK_Java &&
2631 PreviousNotConst->is(tok::r_square)) {
2632 return true;
2635 // const a = in JavaScript.
2636 return Style.isJavaScript() && PreviousNotConst->is(tok::kw_const);
2639 /// Determine whether '(' is starting a C++ cast.
2640 bool lParenStartsCppCast(const FormatToken &Tok) {
2641 // C-style casts are only used in C++.
2642 if (!IsCpp)
2643 return false;
2645 FormatToken *LeftOfParens = Tok.getPreviousNonComment();
2646 if (LeftOfParens && LeftOfParens->is(TT_TemplateCloser) &&
2647 LeftOfParens->MatchingParen) {
2648 auto *Prev = LeftOfParens->MatchingParen->getPreviousNonComment();
2649 if (Prev &&
2650 Prev->isOneOf(tok::kw_const_cast, tok::kw_dynamic_cast,
2651 tok::kw_reinterpret_cast, tok::kw_static_cast)) {
2652 // FIXME: Maybe we should handle identifiers ending with "_cast",
2653 // e.g. any_cast?
2654 return true;
2657 return false;
2660 /// Determine whether ')' is ending a cast.
2661 bool rParenEndsCast(const FormatToken &Tok) {
2662 // C-style casts are only used in C++, C# and Java.
2663 if (!Style.isCSharp() && !IsCpp && Style.Language != FormatStyle::LK_Java)
2664 return false;
2666 // Empty parens aren't casts and there are no casts at the end of the line.
2667 if (Tok.Previous == Tok.MatchingParen || !Tok.Next || !Tok.MatchingParen)
2668 return false;
2670 if (Tok.MatchingParen->is(TT_OverloadedOperatorLParen))
2671 return false;
2673 FormatToken *LeftOfParens = Tok.MatchingParen->getPreviousNonComment();
2674 if (LeftOfParens) {
2675 // If there is a closing parenthesis left of the current
2676 // parentheses, look past it as these might be chained casts.
2677 if (LeftOfParens->is(tok::r_paren) &&
2678 LeftOfParens->isNot(TT_CastRParen)) {
2679 if (!LeftOfParens->MatchingParen ||
2680 !LeftOfParens->MatchingParen->Previous) {
2681 return false;
2683 LeftOfParens = LeftOfParens->MatchingParen->Previous;
2686 if (LeftOfParens->is(tok::r_square)) {
2687 // delete[] (void *)ptr;
2688 auto MayBeArrayDelete = [](FormatToken *Tok) -> FormatToken * {
2689 if (Tok->isNot(tok::r_square))
2690 return nullptr;
2692 Tok = Tok->getPreviousNonComment();
2693 if (!Tok || Tok->isNot(tok::l_square))
2694 return nullptr;
2696 Tok = Tok->getPreviousNonComment();
2697 if (!Tok || Tok->isNot(tok::kw_delete))
2698 return nullptr;
2699 return Tok;
2701 if (FormatToken *MaybeDelete = MayBeArrayDelete(LeftOfParens))
2702 LeftOfParens = MaybeDelete;
2705 // The Condition directly below this one will see the operator arguments
2706 // as a (void *foo) cast.
2707 // void operator delete(void *foo) ATTRIB;
2708 if (LeftOfParens->Tok.getIdentifierInfo() && LeftOfParens->Previous &&
2709 LeftOfParens->Previous->is(tok::kw_operator)) {
2710 return false;
2713 // If there is an identifier (or with a few exceptions a keyword) right
2714 // before the parentheses, this is unlikely to be a cast.
2715 if (LeftOfParens->Tok.getIdentifierInfo() &&
2716 !LeftOfParens->isOneOf(Keywords.kw_in, tok::kw_return, tok::kw_case,
2717 tok::kw_delete, tok::kw_throw)) {
2718 return false;
2721 // Certain other tokens right before the parentheses are also signals that
2722 // this cannot be a cast.
2723 if (LeftOfParens->isOneOf(tok::at, tok::r_square, TT_OverloadedOperator,
2724 TT_TemplateCloser, tok::ellipsis)) {
2725 return false;
2729 if (Tok.Next->isOneOf(tok::question, tok::ampamp))
2730 return false;
2732 // `foreach((A a, B b) in someList)` should not be seen as a cast.
2733 if (Tok.Next->is(Keywords.kw_in) && Style.isCSharp())
2734 return false;
2736 // Functions which end with decorations like volatile, noexcept are unlikely
2737 // to be casts.
2738 if (Tok.Next->isOneOf(tok::kw_noexcept, tok::kw_volatile, tok::kw_const,
2739 tok::kw_requires, tok::kw_throw, tok::arrow,
2740 Keywords.kw_override, Keywords.kw_final) ||
2741 isCppAttribute(IsCpp, *Tok.Next)) {
2742 return false;
2745 // As Java has no function types, a "(" after the ")" likely means that this
2746 // is a cast.
2747 if (Style.Language == FormatStyle::LK_Java && Tok.Next->is(tok::l_paren))
2748 return true;
2750 // If a (non-string) literal follows, this is likely a cast.
2751 if (Tok.Next->isOneOf(tok::kw_sizeof, tok::kw_alignof) ||
2752 (Tok.Next->Tok.isLiteral() && Tok.Next->isNot(tok::string_literal))) {
2753 return true;
2756 // Heuristically try to determine whether the parentheses contain a type.
2757 auto IsQualifiedPointerOrReference = [](FormatToken *T, bool IsCpp) {
2758 // This is used to handle cases such as x = (foo *const)&y;
2759 assert(!T->isTypeName(IsCpp) && "Should have already been checked");
2760 // Strip trailing qualifiers such as const or volatile when checking
2761 // whether the parens could be a cast to a pointer/reference type.
2762 while (T) {
2763 if (T->is(TT_AttributeRParen)) {
2764 // Handle `x = (foo *__attribute__((foo)))&v;`:
2765 assert(T->is(tok::r_paren));
2766 assert(T->MatchingParen);
2767 assert(T->MatchingParen->is(tok::l_paren));
2768 assert(T->MatchingParen->is(TT_AttributeLParen));
2769 if (const auto *Tok = T->MatchingParen->Previous;
2770 Tok && Tok->isAttribute()) {
2771 T = Tok->Previous;
2772 continue;
2774 } else if (T->is(TT_AttributeSquare)) {
2775 // Handle `x = (foo *[[clang::foo]])&v;`:
2776 if (T->MatchingParen && T->MatchingParen->Previous) {
2777 T = T->MatchingParen->Previous;
2778 continue;
2780 } else if (T->canBePointerOrReferenceQualifier()) {
2781 T = T->Previous;
2782 continue;
2784 break;
2786 return T && T->is(TT_PointerOrReference);
2788 bool ParensAreType =
2789 !Tok.Previous ||
2790 Tok.Previous->isOneOf(TT_TemplateCloser, TT_TypeDeclarationParen) ||
2791 Tok.Previous->isTypeName(IsCpp) ||
2792 IsQualifiedPointerOrReference(Tok.Previous, IsCpp);
2793 bool ParensCouldEndDecl =
2794 Tok.Next->isOneOf(tok::equal, tok::semi, tok::l_brace, tok::greater);
2795 if (ParensAreType && !ParensCouldEndDecl)
2796 return true;
2798 // At this point, we heuristically assume that there are no casts at the
2799 // start of the line. We assume that we have found most cases where there
2800 // are by the logic above, e.g. "(void)x;".
2801 if (!LeftOfParens)
2802 return false;
2804 // Certain token types inside the parentheses mean that this can't be a
2805 // cast.
2806 for (const FormatToken *Token = Tok.MatchingParen->Next; Token != &Tok;
2807 Token = Token->Next) {
2808 if (Token->is(TT_BinaryOperator))
2809 return false;
2812 // If the following token is an identifier or 'this', this is a cast. All
2813 // cases where this can be something else are handled above.
2814 if (Tok.Next->isOneOf(tok::identifier, tok::kw_this))
2815 return true;
2817 // Look for a cast `( x ) (`.
2818 if (Tok.Next->is(tok::l_paren) && Tok.Previous && Tok.Previous->Previous) {
2819 if (Tok.Previous->is(tok::identifier) &&
2820 Tok.Previous->Previous->is(tok::l_paren)) {
2821 return true;
2825 if (!Tok.Next->Next)
2826 return false;
2828 // If the next token after the parenthesis is a unary operator, assume
2829 // that this is cast, unless there are unexpected tokens inside the
2830 // parenthesis.
2831 const bool NextIsAmpOrStar = Tok.Next->isOneOf(tok::amp, tok::star);
2832 if (!(Tok.Next->isUnaryOperator() || NextIsAmpOrStar) ||
2833 Tok.Next->is(tok::plus) ||
2834 !Tok.Next->Next->isOneOf(tok::identifier, tok::numeric_constant)) {
2835 return false;
2837 if (NextIsAmpOrStar &&
2838 (Tok.Next->Next->is(tok::numeric_constant) || Line.InPPDirective)) {
2839 return false;
2841 if (Line.InPPDirective && Tok.Next->is(tok::minus))
2842 return false;
2843 // Search for unexpected tokens.
2844 for (FormatToken *Prev = Tok.Previous; Prev != Tok.MatchingParen;
2845 Prev = Prev->Previous) {
2846 if (!Prev->isOneOf(tok::kw_const, tok::identifier, tok::coloncolon))
2847 return false;
2849 return true;
2852 /// Returns true if the token is used as a unary operator.
2853 bool determineUnaryOperatorByUsage(const FormatToken &Tok) {
2854 const FormatToken *PrevToken = Tok.getPreviousNonComment();
2855 if (!PrevToken)
2856 return true;
2858 // These keywords are deliberately not included here because they may
2859 // precede only one of unary star/amp and plus/minus but not both. They are
2860 // either included in determineStarAmpUsage or determinePlusMinusCaretUsage.
2862 // @ - It may be followed by a unary `-` in Objective-C literals. We don't
2863 // know how they can be followed by a star or amp.
2864 if (PrevToken->isOneOf(
2865 TT_ConditionalExpr, tok::l_paren, tok::comma, tok::colon, tok::semi,
2866 tok::equal, tok::question, tok::l_square, tok::l_brace,
2867 tok::kw_case, tok::kw_co_await, tok::kw_co_return, tok::kw_co_yield,
2868 tok::kw_delete, tok::kw_return, tok::kw_throw)) {
2869 return true;
2872 // We put sizeof here instead of only in determineStarAmpUsage. In the cases
2873 // where the unary `+` operator is overloaded, it is reasonable to write
2874 // things like `sizeof +x`. Like commit 446d6ec996c6c3.
2875 if (PrevToken->is(tok::kw_sizeof))
2876 return true;
2878 // A sequence of leading unary operators.
2879 if (PrevToken->isOneOf(TT_CastRParen, TT_UnaryOperator))
2880 return true;
2882 // There can't be two consecutive binary operators.
2883 if (PrevToken->is(TT_BinaryOperator))
2884 return true;
2886 return false;
2889 /// Return the type of the given token assuming it is * or &.
2890 TokenType determineStarAmpUsage(const FormatToken &Tok, bool IsExpression,
2891 bool InTemplateArgument) {
2892 if (Style.isJavaScript())
2893 return TT_BinaryOperator;
2895 // && in C# must be a binary operator.
2896 if (Style.isCSharp() && Tok.is(tok::ampamp))
2897 return TT_BinaryOperator;
2899 if (Style.isVerilog()) {
2900 // In Verilog, `*` can only be a binary operator. `&` can be either unary
2901 // or binary. `*` also includes `*>` in module path declarations in
2902 // specify blocks because merged tokens take the type of the first one by
2903 // default.
2904 if (Tok.is(tok::star))
2905 return TT_BinaryOperator;
2906 return determineUnaryOperatorByUsage(Tok) ? TT_UnaryOperator
2907 : TT_BinaryOperator;
2910 const FormatToken *PrevToken = Tok.getPreviousNonComment();
2911 if (!PrevToken)
2912 return TT_UnaryOperator;
2913 if (PrevToken->is(TT_TypeName))
2914 return TT_PointerOrReference;
2915 if (PrevToken->isOneOf(tok::kw_new, tok::kw_delete) && Tok.is(tok::ampamp))
2916 return TT_BinaryOperator;
2918 const FormatToken *NextToken = Tok.getNextNonComment();
2920 if (InTemplateArgument && NextToken && NextToken->is(tok::kw_noexcept))
2921 return TT_BinaryOperator;
2923 if (!NextToken ||
2924 NextToken->isOneOf(tok::arrow, tok::equal, tok::comma, tok::r_paren,
2925 TT_RequiresClause) ||
2926 (NextToken->is(tok::kw_noexcept) && !IsExpression) ||
2927 NextToken->canBePointerOrReferenceQualifier() ||
2928 (NextToken->is(tok::l_brace) && !NextToken->getNextNonComment())) {
2929 return TT_PointerOrReference;
2932 if (PrevToken->is(tok::coloncolon))
2933 return TT_PointerOrReference;
2935 if (PrevToken->is(tok::r_paren) && PrevToken->is(TT_TypeDeclarationParen))
2936 return TT_PointerOrReference;
2938 if (determineUnaryOperatorByUsage(Tok))
2939 return TT_UnaryOperator;
2941 if (NextToken->is(tok::l_square) && NextToken->isNot(TT_LambdaLSquare))
2942 return TT_PointerOrReference;
2943 if (NextToken->is(tok::kw_operator) && !IsExpression)
2944 return TT_PointerOrReference;
2945 if (NextToken->isOneOf(tok::comma, tok::semi))
2946 return TT_PointerOrReference;
2948 // After right braces, star tokens are likely to be pointers to struct,
2949 // union, or class.
2950 // struct {} *ptr;
2951 // This by itself is not sufficient to distinguish from multiplication
2952 // following a brace-initialized expression, as in:
2953 // int i = int{42} * 2;
2954 // In the struct case, the part of the struct declaration until the `{` and
2955 // the `}` are put on separate unwrapped lines; in the brace-initialized
2956 // case, the matching `{` is on the same unwrapped line, so check for the
2957 // presence of the matching brace to distinguish between those.
2958 if (PrevToken->is(tok::r_brace) && Tok.is(tok::star) &&
2959 !PrevToken->MatchingParen) {
2960 return TT_PointerOrReference;
2963 if (PrevToken->endsSequence(tok::r_square, tok::l_square, tok::kw_delete))
2964 return TT_UnaryOperator;
2966 if (PrevToken->Tok.isLiteral() ||
2967 PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::kw_true,
2968 tok::kw_false, tok::r_brace)) {
2969 return TT_BinaryOperator;
2972 const FormatToken *NextNonParen = NextToken;
2973 while (NextNonParen && NextNonParen->is(tok::l_paren))
2974 NextNonParen = NextNonParen->getNextNonComment();
2975 if (NextNonParen && (NextNonParen->Tok.isLiteral() ||
2976 NextNonParen->isOneOf(tok::kw_true, tok::kw_false) ||
2977 NextNonParen->isUnaryOperator())) {
2978 return TT_BinaryOperator;
2981 // If we know we're in a template argument, there are no named declarations.
2982 // Thus, having an identifier on the right-hand side indicates a binary
2983 // operator.
2984 if (InTemplateArgument && NextToken->Tok.isAnyIdentifier())
2985 return TT_BinaryOperator;
2987 // "&&" followed by "(", "*", or "&" is quite unlikely to be two successive
2988 // unary "&".
2989 if (Tok.is(tok::ampamp) &&
2990 NextToken->isOneOf(tok::l_paren, tok::star, tok::amp)) {
2991 return TT_BinaryOperator;
2994 // This catches some cases where evaluation order is used as control flow:
2995 // aaa && aaa->f();
2996 if (NextToken->Tok.isAnyIdentifier()) {
2997 const FormatToken *NextNextToken = NextToken->getNextNonComment();
2998 if (NextNextToken && NextNextToken->is(tok::arrow))
2999 return TT_BinaryOperator;
3002 // It is very unlikely that we are going to find a pointer or reference type
3003 // definition on the RHS of an assignment.
3004 if (IsExpression && !Contexts.back().CaretFound)
3005 return TT_BinaryOperator;
3007 // Opeartors at class scope are likely pointer or reference members.
3008 if (!Scopes.empty() && Scopes.back() == ST_Class)
3009 return TT_PointerOrReference;
3011 // Tokens that indicate member access or chained operator& use.
3012 auto IsChainedOperatorAmpOrMember = [](const FormatToken *token) {
3013 return !token || token->isOneOf(tok::amp, tok::period, tok::arrow,
3014 tok::arrowstar, tok::periodstar);
3017 // It's more likely that & represents operator& than an uninitialized
3018 // reference.
3019 if (Tok.is(tok::amp) && PrevToken && PrevToken->Tok.isAnyIdentifier() &&
3020 IsChainedOperatorAmpOrMember(PrevToken->getPreviousNonComment()) &&
3021 NextToken && NextToken->Tok.isAnyIdentifier()) {
3022 if (auto NextNext = NextToken->getNextNonComment();
3023 NextNext &&
3024 (IsChainedOperatorAmpOrMember(NextNext) || NextNext->is(tok::semi))) {
3025 return TT_BinaryOperator;
3029 return TT_PointerOrReference;
3032 TokenType determinePlusMinusCaretUsage(const FormatToken &Tok) {
3033 if (determineUnaryOperatorByUsage(Tok))
3034 return TT_UnaryOperator;
3036 const FormatToken *PrevToken = Tok.getPreviousNonComment();
3037 if (!PrevToken)
3038 return TT_UnaryOperator;
3040 if (PrevToken->is(tok::at))
3041 return TT_UnaryOperator;
3043 // Fall back to marking the token as binary operator.
3044 return TT_BinaryOperator;
3047 /// Determine whether ++/-- are pre- or post-increments/-decrements.
3048 TokenType determineIncrementUsage(const FormatToken &Tok) {
3049 const FormatToken *PrevToken = Tok.getPreviousNonComment();
3050 if (!PrevToken || PrevToken->is(TT_CastRParen))
3051 return TT_UnaryOperator;
3052 if (PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::identifier))
3053 return TT_TrailingUnaryOperator;
3055 return TT_UnaryOperator;
3058 SmallVector<Context, 8> Contexts;
3060 const FormatStyle &Style;
3061 AnnotatedLine &Line;
3062 FormatToken *CurrentToken;
3063 bool AutoFound;
3064 bool IsCpp;
3065 const AdditionalKeywords &Keywords;
3067 SmallVector<ScopeType> &Scopes;
3069 // Set of "<" tokens that do not open a template parameter list. If parseAngle
3070 // determines that a specific token can't be a template opener, it will make
3071 // same decision irrespective of the decisions for tokens leading up to it.
3072 // Store this information to prevent this from causing exponential runtime.
3073 llvm::SmallPtrSet<FormatToken *, 16> NonTemplateLess;
3076 static const int PrecedenceUnaryOperator = prec::PointerToMember + 1;
3077 static const int PrecedenceArrowAndPeriod = prec::PointerToMember + 2;
3079 /// Parses binary expressions by inserting fake parenthesis based on
3080 /// operator precedence.
3081 class ExpressionParser {
3082 public:
3083 ExpressionParser(const FormatStyle &Style, const AdditionalKeywords &Keywords,
3084 AnnotatedLine &Line)
3085 : Style(Style), Keywords(Keywords), Line(Line), Current(Line.First) {}
3087 /// Parse expressions with the given operator precedence.
3088 void parse(int Precedence = 0) {
3089 // Skip 'return' and ObjC selector colons as they are not part of a binary
3090 // expression.
3091 while (Current && (Current->is(tok::kw_return) ||
3092 (Current->is(tok::colon) &&
3093 Current->isOneOf(TT_ObjCMethodExpr, TT_DictLiteral)))) {
3094 next();
3097 if (!Current || Precedence > PrecedenceArrowAndPeriod)
3098 return;
3100 // Conditional expressions need to be parsed separately for proper nesting.
3101 if (Precedence == prec::Conditional) {
3102 parseConditionalExpr();
3103 return;
3106 // Parse unary operators, which all have a higher precedence than binary
3107 // operators.
3108 if (Precedence == PrecedenceUnaryOperator) {
3109 parseUnaryOperator();
3110 return;
3113 FormatToken *Start = Current;
3114 FormatToken *LatestOperator = nullptr;
3115 unsigned OperatorIndex = 0;
3116 // The first name of the current type in a port list.
3117 FormatToken *VerilogFirstOfType = nullptr;
3119 while (Current) {
3120 // In Verilog ports in a module header that don't have a type take the
3121 // type of the previous one. For example,
3122 // module a(output b,
3123 // c,
3124 // output d);
3125 // In this case there need to be fake parentheses around b and c.
3126 if (Style.isVerilog() && Precedence == prec::Comma) {
3127 VerilogFirstOfType =
3128 verilogGroupDecl(VerilogFirstOfType, LatestOperator);
3131 // Consume operators with higher precedence.
3132 parse(Precedence + 1);
3134 int CurrentPrecedence = getCurrentPrecedence();
3136 if (Precedence == CurrentPrecedence && Current &&
3137 Current->is(TT_SelectorName)) {
3138 if (LatestOperator)
3139 addFakeParenthesis(Start, prec::Level(Precedence));
3140 Start = Current;
3143 if ((Style.isCSharp() || Style.isJavaScript() ||
3144 Style.Language == FormatStyle::LK_Java) &&
3145 Precedence == prec::Additive && Current) {
3146 // A string can be broken without parentheses around it when it is
3147 // already in a sequence of strings joined by `+` signs.
3148 FormatToken *Prev = Current->getPreviousNonComment();
3149 if (Prev && Prev->is(tok::string_literal) &&
3150 (Prev == Start || Prev->endsSequence(tok::string_literal, tok::plus,
3151 TT_StringInConcatenation))) {
3152 Prev->setType(TT_StringInConcatenation);
3156 // At the end of the line or when an operator with lower precedence is
3157 // found, insert fake parenthesis and return.
3158 if (!Current ||
3159 (Current->closesScope() &&
3160 (Current->MatchingParen || Current->is(TT_TemplateString))) ||
3161 (CurrentPrecedence != -1 && CurrentPrecedence < Precedence) ||
3162 (CurrentPrecedence == prec::Conditional &&
3163 Precedence == prec::Assignment && Current->is(tok::colon))) {
3164 break;
3167 // Consume scopes: (), [], <> and {}
3168 // In addition to that we handle require clauses as scope, so that the
3169 // constraints in that are correctly indented.
3170 if (Current->opensScope() ||
3171 Current->isOneOf(TT_RequiresClause,
3172 TT_RequiresClauseInARequiresExpression)) {
3173 // In fragment of a JavaScript template string can look like '}..${' and
3174 // thus close a scope and open a new one at the same time.
3175 while (Current && (!Current->closesScope() || Current->opensScope())) {
3176 next();
3177 parse();
3179 next();
3180 } else {
3181 // Operator found.
3182 if (CurrentPrecedence == Precedence) {
3183 if (LatestOperator)
3184 LatestOperator->NextOperator = Current;
3185 LatestOperator = Current;
3186 Current->OperatorIndex = OperatorIndex;
3187 ++OperatorIndex;
3189 next(/*SkipPastLeadingComments=*/Precedence > 0);
3193 // Group variables of the same type.
3194 if (Style.isVerilog() && Precedence == prec::Comma && VerilogFirstOfType)
3195 addFakeParenthesis(VerilogFirstOfType, prec::Comma);
3197 if (LatestOperator && (Current || Precedence > 0)) {
3198 // The requires clauses do not neccessarily end in a semicolon or a brace,
3199 // but just go over to struct/class or a function declaration, we need to
3200 // intervene so that the fake right paren is inserted correctly.
3201 auto End =
3202 (Start->Previous &&
3203 Start->Previous->isOneOf(TT_RequiresClause,
3204 TT_RequiresClauseInARequiresExpression))
3205 ? [this]() {
3206 auto Ret = Current ? Current : Line.Last;
3207 while (!Ret->ClosesRequiresClause && Ret->Previous)
3208 Ret = Ret->Previous;
3209 return Ret;
3211 : nullptr;
3213 if (Precedence == PrecedenceArrowAndPeriod) {
3214 // Call expressions don't have a binary operator precedence.
3215 addFakeParenthesis(Start, prec::Unknown, End);
3216 } else {
3217 addFakeParenthesis(Start, prec::Level(Precedence), End);
3222 private:
3223 /// Gets the precedence (+1) of the given token for binary operators
3224 /// and other tokens that we treat like binary operators.
3225 int getCurrentPrecedence() {
3226 if (Current) {
3227 const FormatToken *NextNonComment = Current->getNextNonComment();
3228 if (Current->is(TT_ConditionalExpr))
3229 return prec::Conditional;
3230 if (NextNonComment && Current->is(TT_SelectorName) &&
3231 (NextNonComment->isOneOf(TT_DictLiteral, TT_JsTypeColon) ||
3232 (Style.isProto() && NextNonComment->is(tok::less)))) {
3233 return prec::Assignment;
3235 if (Current->is(TT_JsComputedPropertyName))
3236 return prec::Assignment;
3237 if (Current->is(TT_TrailingReturnArrow))
3238 return prec::Comma;
3239 if (Current->is(TT_FatArrow))
3240 return prec::Assignment;
3241 if (Current->isOneOf(tok::semi, TT_InlineASMColon, TT_SelectorName) ||
3242 (Current->is(tok::comment) && NextNonComment &&
3243 NextNonComment->is(TT_SelectorName))) {
3244 return 0;
3246 if (Current->is(TT_RangeBasedForLoopColon))
3247 return prec::Comma;
3248 if ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
3249 Current->is(Keywords.kw_instanceof)) {
3250 return prec::Relational;
3252 if (Style.isJavaScript() &&
3253 Current->isOneOf(Keywords.kw_in, Keywords.kw_as)) {
3254 return prec::Relational;
3256 if (Current->is(TT_BinaryOperator) || Current->is(tok::comma))
3257 return Current->getPrecedence();
3258 if (Current->isOneOf(tok::period, tok::arrow) &&
3259 Current->isNot(TT_TrailingReturnArrow)) {
3260 return PrecedenceArrowAndPeriod;
3262 if ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
3263 Current->isOneOf(Keywords.kw_extends, Keywords.kw_implements,
3264 Keywords.kw_throws)) {
3265 return 0;
3267 // In Verilog case labels are not on separate lines straight out of
3268 // UnwrappedLineParser. The colon is not part of an expression.
3269 if (Style.isVerilog() && Current->is(tok::colon))
3270 return 0;
3272 return -1;
3275 void addFakeParenthesis(FormatToken *Start, prec::Level Precedence,
3276 FormatToken *End = nullptr) {
3277 // Do not assign fake parenthesis to tokens that are part of an
3278 // unexpanded macro call. The line within the macro call contains
3279 // the parenthesis and commas, and we will not find operators within
3280 // that structure.
3281 if (Start->MacroParent)
3282 return;
3284 Start->FakeLParens.push_back(Precedence);
3285 if (Precedence > prec::Unknown)
3286 Start->StartsBinaryExpression = true;
3287 if (!End && Current)
3288 End = Current->getPreviousNonComment();
3289 if (End) {
3290 ++End->FakeRParens;
3291 if (Precedence > prec::Unknown)
3292 End->EndsBinaryExpression = true;
3296 /// Parse unary operator expressions and surround them with fake
3297 /// parentheses if appropriate.
3298 void parseUnaryOperator() {
3299 llvm::SmallVector<FormatToken *, 2> Tokens;
3300 while (Current && Current->is(TT_UnaryOperator)) {
3301 Tokens.push_back(Current);
3302 next();
3304 parse(PrecedenceArrowAndPeriod);
3305 for (FormatToken *Token : llvm::reverse(Tokens)) {
3306 // The actual precedence doesn't matter.
3307 addFakeParenthesis(Token, prec::Unknown);
3311 void parseConditionalExpr() {
3312 while (Current && Current->isTrailingComment())
3313 next();
3314 FormatToken *Start = Current;
3315 parse(prec::LogicalOr);
3316 if (!Current || Current->isNot(tok::question))
3317 return;
3318 next();
3319 parse(prec::Assignment);
3320 if (!Current || Current->isNot(TT_ConditionalExpr))
3321 return;
3322 next();
3323 parse(prec::Assignment);
3324 addFakeParenthesis(Start, prec::Conditional);
3327 void next(bool SkipPastLeadingComments = true) {
3328 if (Current)
3329 Current = Current->Next;
3330 while (Current &&
3331 (Current->NewlinesBefore == 0 || SkipPastLeadingComments) &&
3332 Current->isTrailingComment()) {
3333 Current = Current->Next;
3337 // Add fake parenthesis around declarations of the same type for example in a
3338 // module prototype. Return the first port / variable of the current type.
3339 FormatToken *verilogGroupDecl(FormatToken *FirstOfType,
3340 FormatToken *PreviousComma) {
3341 if (!Current)
3342 return nullptr;
3344 FormatToken *Start = Current;
3346 // Skip attributes.
3347 while (Start->startsSequence(tok::l_paren, tok::star)) {
3348 if (!(Start = Start->MatchingParen) ||
3349 !(Start = Start->getNextNonComment())) {
3350 return nullptr;
3354 FormatToken *Tok = Start;
3356 if (Tok->is(Keywords.kw_assign))
3357 Tok = Tok->getNextNonComment();
3359 // Skip any type qualifiers to find the first identifier. It may be either a
3360 // new type name or a variable name. There can be several type qualifiers
3361 // preceding a variable name, and we can not tell them apart by looking at
3362 // the word alone since a macro can be defined as either a type qualifier or
3363 // a variable name. Thus we use the last word before the dimensions instead
3364 // of the first word as the candidate for the variable or type name.
3365 FormatToken *First = nullptr;
3366 while (Tok) {
3367 FormatToken *Next = Tok->getNextNonComment();
3369 if (Tok->is(tok::hash)) {
3370 // Start of a macro expansion.
3371 First = Tok;
3372 Tok = Next;
3373 if (Tok)
3374 Tok = Tok->getNextNonComment();
3375 } else if (Tok->is(tok::hashhash)) {
3376 // Concatenation. Skip.
3377 Tok = Next;
3378 if (Tok)
3379 Tok = Tok->getNextNonComment();
3380 } else if (Keywords.isVerilogQualifier(*Tok) ||
3381 Keywords.isVerilogIdentifier(*Tok)) {
3382 First = Tok;
3383 Tok = Next;
3384 // The name may have dots like `interface_foo.modport_foo`.
3385 while (Tok && Tok->isOneOf(tok::period, tok::coloncolon) &&
3386 (Tok = Tok->getNextNonComment())) {
3387 if (Keywords.isVerilogIdentifier(*Tok))
3388 Tok = Tok->getNextNonComment();
3390 } else if (!Next) {
3391 Tok = nullptr;
3392 } else if (Tok->is(tok::l_paren)) {
3393 // Make sure the parenthesized list is a drive strength. Otherwise the
3394 // statement may be a module instantiation in which case we have already
3395 // found the instance name.
3396 if (Next->isOneOf(
3397 Keywords.kw_highz0, Keywords.kw_highz1, Keywords.kw_large,
3398 Keywords.kw_medium, Keywords.kw_pull0, Keywords.kw_pull1,
3399 Keywords.kw_small, Keywords.kw_strong0, Keywords.kw_strong1,
3400 Keywords.kw_supply0, Keywords.kw_supply1, Keywords.kw_weak0,
3401 Keywords.kw_weak1)) {
3402 Tok->setType(TT_VerilogStrength);
3403 Tok = Tok->MatchingParen;
3404 if (Tok) {
3405 Tok->setType(TT_VerilogStrength);
3406 Tok = Tok->getNextNonComment();
3408 } else {
3409 break;
3411 } else if (Tok->is(tok::hash)) {
3412 if (Next->is(tok::l_paren))
3413 Next = Next->MatchingParen;
3414 if (Next)
3415 Tok = Next->getNextNonComment();
3416 } else {
3417 break;
3421 // Find the second identifier. If it exists it will be the name.
3422 FormatToken *Second = nullptr;
3423 // Dimensions.
3424 while (Tok && Tok->is(tok::l_square) && (Tok = Tok->MatchingParen))
3425 Tok = Tok->getNextNonComment();
3426 if (Tok && (Tok->is(tok::hash) || Keywords.isVerilogIdentifier(*Tok)))
3427 Second = Tok;
3429 // If the second identifier doesn't exist and there are qualifiers, the type
3430 // is implied.
3431 FormatToken *TypedName = nullptr;
3432 if (Second) {
3433 TypedName = Second;
3434 if (First && First->is(TT_Unknown))
3435 First->setType(TT_VerilogDimensionedTypeName);
3436 } else if (First != Start) {
3437 // If 'First' is null, then this isn't a declaration, 'TypedName' gets set
3438 // to null as intended.
3439 TypedName = First;
3442 if (TypedName) {
3443 // This is a declaration with a new type.
3444 if (TypedName->is(TT_Unknown))
3445 TypedName->setType(TT_StartOfName);
3446 // Group variables of the previous type.
3447 if (FirstOfType && PreviousComma) {
3448 PreviousComma->setType(TT_VerilogTypeComma);
3449 addFakeParenthesis(FirstOfType, prec::Comma, PreviousComma->Previous);
3452 FirstOfType = TypedName;
3454 // Don't let higher precedence handle the qualifiers. For example if we
3455 // have:
3456 // parameter x = 0
3457 // We skip `parameter` here. This way the fake parentheses for the
3458 // assignment will be around `x = 0`.
3459 while (Current && Current != FirstOfType) {
3460 if (Current->opensScope()) {
3461 next();
3462 parse();
3464 next();
3468 return FirstOfType;
3471 const FormatStyle &Style;
3472 const AdditionalKeywords &Keywords;
3473 const AnnotatedLine &Line;
3474 FormatToken *Current;
3477 } // end anonymous namespace
3479 void TokenAnnotator::setCommentLineLevels(
3480 SmallVectorImpl<AnnotatedLine *> &Lines) const {
3481 const AnnotatedLine *NextNonCommentLine = nullptr;
3482 for (AnnotatedLine *Line : llvm::reverse(Lines)) {
3483 assert(Line->First);
3485 // If the comment is currently aligned with the line immediately following
3486 // it, that's probably intentional and we should keep it.
3487 if (NextNonCommentLine && NextNonCommentLine->First->NewlinesBefore < 2 &&
3488 Line->isComment() && !isClangFormatOff(Line->First->TokenText) &&
3489 NextNonCommentLine->First->OriginalColumn ==
3490 Line->First->OriginalColumn) {
3491 const bool PPDirectiveOrImportStmt =
3492 NextNonCommentLine->Type == LT_PreprocessorDirective ||
3493 NextNonCommentLine->Type == LT_ImportStatement;
3494 if (PPDirectiveOrImportStmt)
3495 Line->Type = LT_CommentAbovePPDirective;
3496 // Align comments for preprocessor lines with the # in column 0 if
3497 // preprocessor lines are not indented. Otherwise, align with the next
3498 // line.
3499 Line->Level = Style.IndentPPDirectives != FormatStyle::PPDIS_BeforeHash &&
3500 PPDirectiveOrImportStmt
3502 : NextNonCommentLine->Level;
3503 } else {
3504 NextNonCommentLine = Line->First->isNot(tok::r_brace) ? Line : nullptr;
3507 setCommentLineLevels(Line->Children);
3511 static unsigned maxNestingDepth(const AnnotatedLine &Line) {
3512 unsigned Result = 0;
3513 for (const auto *Tok = Line.First; Tok; Tok = Tok->Next)
3514 Result = std::max(Result, Tok->NestingLevel);
3515 return Result;
3518 // Returns the name of a function with no return type, e.g. a constructor or
3519 // destructor.
3520 static FormatToken *getFunctionName(const AnnotatedLine &Line) {
3521 for (FormatToken *Tok = Line.getFirstNonComment(), *Name = nullptr; Tok;
3522 Tok = Tok->getNextNonComment()) {
3523 // Skip C++11 attributes both before and after the function name.
3524 if (Tok->is(tok::l_square) && Tok->is(TT_AttributeSquare)) {
3525 Tok = Tok->MatchingParen;
3526 if (!Tok)
3527 break;
3528 continue;
3531 // Make sure the name is followed by a pair of parentheses.
3532 if (Name) {
3533 return Tok->is(tok::l_paren) && Tok->isNot(TT_FunctionTypeLParen) &&
3534 Tok->MatchingParen
3535 ? Name
3536 : nullptr;
3539 // Skip keywords that may precede the constructor/destructor name.
3540 if (Tok->isOneOf(tok::kw_friend, tok::kw_inline, tok::kw_virtual,
3541 tok::kw_constexpr, tok::kw_consteval, tok::kw_explicit)) {
3542 continue;
3545 // A qualified name may start from the global namespace.
3546 if (Tok->is(tok::coloncolon)) {
3547 Tok = Tok->Next;
3548 if (!Tok)
3549 break;
3552 // Skip to the unqualified part of the name.
3553 while (Tok->startsSequence(tok::identifier, tok::coloncolon)) {
3554 assert(Tok->Next);
3555 Tok = Tok->Next->Next;
3556 if (!Tok)
3557 return nullptr;
3560 // Skip the `~` if a destructor name.
3561 if (Tok->is(tok::tilde)) {
3562 Tok = Tok->Next;
3563 if (!Tok)
3564 break;
3567 // Make sure the name is not already annotated, e.g. as NamespaceMacro.
3568 if (Tok->isNot(tok::identifier) || Tok->isNot(TT_Unknown))
3569 break;
3571 Name = Tok;
3574 return nullptr;
3577 // Checks if Tok is a constructor/destructor name qualified by its class name.
3578 static bool isCtorOrDtorName(const FormatToken *Tok) {
3579 assert(Tok && Tok->is(tok::identifier));
3580 const auto *Prev = Tok->Previous;
3582 if (Prev && Prev->is(tok::tilde))
3583 Prev = Prev->Previous;
3585 if (!Prev || !Prev->endsSequence(tok::coloncolon, tok::identifier))
3586 return false;
3588 assert(Prev->Previous);
3589 return Prev->Previous->TokenText == Tok->TokenText;
3592 void TokenAnnotator::annotate(AnnotatedLine &Line) {
3593 AnnotatingParser Parser(Style, Line, Keywords, Scopes);
3594 Line.Type = Parser.parseLine();
3596 for (auto &Child : Line.Children)
3597 annotate(*Child);
3599 // With very deep nesting, ExpressionParser uses lots of stack and the
3600 // formatting algorithm is very slow. We're not going to do a good job here
3601 // anyway - it's probably generated code being formatted by mistake.
3602 // Just skip the whole line.
3603 if (maxNestingDepth(Line) > 50)
3604 Line.Type = LT_Invalid;
3606 if (Line.Type == LT_Invalid)
3607 return;
3609 ExpressionParser ExprParser(Style, Keywords, Line);
3610 ExprParser.parse();
3612 if (IsCpp) {
3613 auto *Tok = getFunctionName(Line);
3614 if (Tok && ((!Scopes.empty() && Scopes.back() == ST_Class) ||
3615 Line.endsWith(TT_FunctionLBrace) || isCtorOrDtorName(Tok))) {
3616 Tok->setFinalizedType(TT_CtorDtorDeclName);
3620 if (Line.startsWith(TT_ObjCMethodSpecifier))
3621 Line.Type = LT_ObjCMethodDecl;
3622 else if (Line.startsWith(TT_ObjCDecl))
3623 Line.Type = LT_ObjCDecl;
3624 else if (Line.startsWith(TT_ObjCProperty))
3625 Line.Type = LT_ObjCProperty;
3627 auto *First = Line.First;
3628 First->SpacesRequiredBefore = 1;
3629 First->CanBreakBefore = First->MustBreakBefore;
3631 if (First->is(tok::eof) && First->NewlinesBefore == 0 &&
3632 Style.InsertNewlineAtEOF) {
3633 First->NewlinesBefore = 1;
3637 // This function heuristically determines whether 'Current' starts the name of a
3638 // function declaration.
3639 static bool isFunctionDeclarationName(bool IsCpp, const FormatToken &Current,
3640 const AnnotatedLine &Line,
3641 FormatToken *&ClosingParen) {
3642 assert(Current.Previous);
3644 if (Current.is(TT_FunctionDeclarationName))
3645 return true;
3647 if (!Current.Tok.getIdentifierInfo())
3648 return false;
3650 const auto &Previous = *Current.Previous;
3652 if (const auto *PrevPrev = Previous.Previous;
3653 PrevPrev && PrevPrev->is(TT_ObjCDecl)) {
3654 return false;
3657 auto skipOperatorName =
3658 [IsCpp](const FormatToken *Next) -> const FormatToken * {
3659 for (; Next; Next = Next->Next) {
3660 if (Next->is(TT_OverloadedOperatorLParen))
3661 return Next;
3662 if (Next->is(TT_OverloadedOperator))
3663 continue;
3664 if (Next->isOneOf(tok::kw_new, tok::kw_delete)) {
3665 // For 'new[]' and 'delete[]'.
3666 if (Next->Next &&
3667 Next->Next->startsSequence(tok::l_square, tok::r_square)) {
3668 Next = Next->Next->Next;
3670 continue;
3672 if (Next->startsSequence(tok::l_square, tok::r_square)) {
3673 // For operator[]().
3674 Next = Next->Next;
3675 continue;
3677 if ((Next->isTypeName(IsCpp) || Next->is(tok::identifier)) &&
3678 Next->Next && Next->Next->isPointerOrReference()) {
3679 // For operator void*(), operator char*(), operator Foo*().
3680 Next = Next->Next;
3681 continue;
3683 if (Next->is(TT_TemplateOpener) && Next->MatchingParen) {
3684 Next = Next->MatchingParen;
3685 continue;
3688 break;
3690 return nullptr;
3693 // Find parentheses of parameter list.
3694 const FormatToken *Next = Current.Next;
3695 if (Current.is(tok::kw_operator)) {
3696 if (Previous.Tok.getIdentifierInfo() &&
3697 !Previous.isOneOf(tok::kw_return, tok::kw_co_return)) {
3698 return true;
3700 if (Previous.is(tok::r_paren) && Previous.is(TT_TypeDeclarationParen)) {
3701 assert(Previous.MatchingParen);
3702 assert(Previous.MatchingParen->is(tok::l_paren));
3703 assert(Previous.MatchingParen->is(TT_TypeDeclarationParen));
3704 return true;
3706 if (!Previous.isPointerOrReference() && Previous.isNot(TT_TemplateCloser))
3707 return false;
3708 Next = skipOperatorName(Next);
3709 } else {
3710 if (Current.isNot(TT_StartOfName) || Current.NestingLevel != 0)
3711 return false;
3712 for (; Next; Next = Next->Next) {
3713 if (Next->is(TT_TemplateOpener) && Next->MatchingParen) {
3714 Next = Next->MatchingParen;
3715 } else if (Next->is(tok::coloncolon)) {
3716 Next = Next->Next;
3717 if (!Next)
3718 return false;
3719 if (Next->is(tok::kw_operator)) {
3720 Next = skipOperatorName(Next->Next);
3721 break;
3723 if (Next->isNot(tok::identifier))
3724 return false;
3725 } else if (isCppAttribute(IsCpp, *Next)) {
3726 Next = Next->MatchingParen;
3727 if (!Next)
3728 return false;
3729 } else if (Next->is(tok::l_paren)) {
3730 break;
3731 } else {
3732 return false;
3737 // Check whether parameter list can belong to a function declaration.
3738 if (!Next || Next->isNot(tok::l_paren) || !Next->MatchingParen)
3739 return false;
3740 ClosingParen = Next->MatchingParen;
3741 assert(ClosingParen->is(tok::r_paren));
3742 // If the lines ends with "{", this is likely a function definition.
3743 if (Line.Last->is(tok::l_brace))
3744 return true;
3745 if (Next->Next == ClosingParen)
3746 return true; // Empty parentheses.
3747 // If there is an &/&& after the r_paren, this is likely a function.
3748 if (ClosingParen->Next && ClosingParen->Next->is(TT_PointerOrReference))
3749 return true;
3751 // Check for K&R C function definitions (and C++ function definitions with
3752 // unnamed parameters), e.g.:
3753 // int f(i)
3754 // {
3755 // return i + 1;
3756 // }
3757 // bool g(size_t = 0, bool b = false)
3758 // {
3759 // return !b;
3760 // }
3761 if (IsCpp && Next->Next && Next->Next->is(tok::identifier) &&
3762 !Line.endsWith(tok::semi)) {
3763 return true;
3766 for (const FormatToken *Tok = Next->Next; Tok && Tok != ClosingParen;
3767 Tok = Tok->Next) {
3768 if (Tok->is(TT_TypeDeclarationParen))
3769 return true;
3770 if (Tok->isOneOf(tok::l_paren, TT_TemplateOpener) && Tok->MatchingParen) {
3771 Tok = Tok->MatchingParen;
3772 continue;
3774 if (Tok->is(tok::kw_const) || Tok->isTypeName(IsCpp) ||
3775 Tok->isOneOf(TT_PointerOrReference, TT_StartOfName, tok::ellipsis)) {
3776 return true;
3778 if (Tok->isOneOf(tok::l_brace, TT_ObjCMethodExpr) || Tok->Tok.isLiteral())
3779 return false;
3781 return false;
3784 bool TokenAnnotator::mustBreakForReturnType(const AnnotatedLine &Line) const {
3785 assert(Line.MightBeFunctionDecl);
3787 if ((Style.BreakAfterReturnType == FormatStyle::RTBS_TopLevel ||
3788 Style.BreakAfterReturnType == FormatStyle::RTBS_TopLevelDefinitions) &&
3789 Line.Level > 0) {
3790 return false;
3793 switch (Style.BreakAfterReturnType) {
3794 case FormatStyle::RTBS_None:
3795 case FormatStyle::RTBS_Automatic:
3796 case FormatStyle::RTBS_ExceptShortType:
3797 return false;
3798 case FormatStyle::RTBS_All:
3799 case FormatStyle::RTBS_TopLevel:
3800 return true;
3801 case FormatStyle::RTBS_AllDefinitions:
3802 case FormatStyle::RTBS_TopLevelDefinitions:
3803 return Line.mightBeFunctionDefinition();
3806 return false;
3809 void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) const {
3810 for (AnnotatedLine *ChildLine : Line.Children)
3811 calculateFormattingInformation(*ChildLine);
3813 auto *First = Line.First;
3814 First->TotalLength = First->IsMultiline
3815 ? Style.ColumnLimit
3816 : Line.FirstStartColumn + First->ColumnWidth;
3817 FormatToken *Current = First->Next;
3818 bool InFunctionDecl = Line.MightBeFunctionDecl;
3819 bool AlignArrayOfStructures =
3820 (Style.AlignArrayOfStructures != FormatStyle::AIAS_None &&
3821 Line.Type == LT_ArrayOfStructInitializer);
3822 if (AlignArrayOfStructures)
3823 calculateArrayInitializerColumnList(Line);
3825 bool SeenName = false;
3826 bool LineIsFunctionDeclaration = false;
3827 FormatToken *ClosingParen = nullptr;
3828 FormatToken *AfterLastAttribute = nullptr;
3830 for (auto *Tok = Current; Tok; Tok = Tok->Next) {
3831 if (Tok->is(TT_StartOfName))
3832 SeenName = true;
3833 if (Tok->Previous->EndsCppAttributeGroup)
3834 AfterLastAttribute = Tok;
3835 if (const bool IsCtorOrDtor = Tok->is(TT_CtorDtorDeclName);
3836 IsCtorOrDtor ||
3837 isFunctionDeclarationName(IsCpp, *Tok, Line, ClosingParen)) {
3838 if (!IsCtorOrDtor)
3839 Tok->setFinalizedType(TT_FunctionDeclarationName);
3840 LineIsFunctionDeclaration = true;
3841 SeenName = true;
3842 break;
3846 if (IsCpp && (LineIsFunctionDeclaration || First->is(TT_CtorDtorDeclName)) &&
3847 Line.endsWith(tok::semi, tok::r_brace)) {
3848 auto *Tok = Line.Last->Previous;
3849 while (Tok->isNot(tok::r_brace))
3850 Tok = Tok->Previous;
3851 if (auto *LBrace = Tok->MatchingParen; LBrace) {
3852 assert(LBrace->is(tok::l_brace));
3853 Tok->setBlockKind(BK_Block);
3854 LBrace->setBlockKind(BK_Block);
3855 LBrace->setFinalizedType(TT_FunctionLBrace);
3859 if (IsCpp && SeenName && AfterLastAttribute &&
3860 mustBreakAfterAttributes(*AfterLastAttribute, Style)) {
3861 AfterLastAttribute->MustBreakBefore = true;
3862 if (LineIsFunctionDeclaration)
3863 Line.ReturnTypeWrapped = true;
3866 if (IsCpp) {
3867 if (!LineIsFunctionDeclaration) {
3868 // Annotate */&/&& in `operator` function calls as binary operators.
3869 for (const auto *Tok = First; Tok; Tok = Tok->Next) {
3870 if (Tok->isNot(tok::kw_operator))
3871 continue;
3872 do {
3873 Tok = Tok->Next;
3874 } while (Tok && Tok->isNot(TT_OverloadedOperatorLParen));
3875 if (!Tok || !Tok->MatchingParen)
3876 break;
3877 const auto *LeftParen = Tok;
3878 for (Tok = Tok->Next; Tok && Tok != LeftParen->MatchingParen;
3879 Tok = Tok->Next) {
3880 if (Tok->isNot(tok::identifier))
3881 continue;
3882 auto *Next = Tok->Next;
3883 const bool NextIsBinaryOperator =
3884 Next && Next->isPointerOrReference() && Next->Next &&
3885 Next->Next->is(tok::identifier);
3886 if (!NextIsBinaryOperator)
3887 continue;
3888 Next->setType(TT_BinaryOperator);
3889 Tok = Next;
3892 } else if (ClosingParen) {
3893 for (auto *Tok = ClosingParen->Next; Tok; Tok = Tok->Next) {
3894 if (Tok->is(TT_CtorInitializerColon))
3895 break;
3896 if (Tok->is(tok::arrow)) {
3897 Tok->setType(TT_TrailingReturnArrow);
3898 break;
3900 if (Tok->isNot(TT_TrailingAnnotation))
3901 continue;
3902 const auto *Next = Tok->Next;
3903 if (!Next || Next->isNot(tok::l_paren))
3904 continue;
3905 Tok = Next->MatchingParen;
3906 if (!Tok)
3907 break;
3912 while (Current) {
3913 const FormatToken *Prev = Current->Previous;
3914 if (Current->is(TT_LineComment)) {
3915 if (Prev->is(BK_BracedInit) && Prev->opensScope()) {
3916 Current->SpacesRequiredBefore =
3917 (Style.Cpp11BracedListStyle && !Style.SpacesInParensOptions.Other)
3919 : 1;
3920 } else if (Prev->is(TT_VerilogMultiLineListLParen)) {
3921 Current->SpacesRequiredBefore = 0;
3922 } else {
3923 Current->SpacesRequiredBefore = Style.SpacesBeforeTrailingComments;
3926 // If we find a trailing comment, iterate backwards to determine whether
3927 // it seems to relate to a specific parameter. If so, break before that
3928 // parameter to avoid changing the comment's meaning. E.g. don't move 'b'
3929 // to the previous line in:
3930 // SomeFunction(a,
3931 // b, // comment
3932 // c);
3933 if (!Current->HasUnescapedNewline) {
3934 for (FormatToken *Parameter = Current->Previous; Parameter;
3935 Parameter = Parameter->Previous) {
3936 if (Parameter->isOneOf(tok::comment, tok::r_brace))
3937 break;
3938 if (Parameter->Previous && Parameter->Previous->is(tok::comma)) {
3939 if (Parameter->Previous->isNot(TT_CtorInitializerComma) &&
3940 Parameter->HasUnescapedNewline) {
3941 Parameter->MustBreakBefore = true;
3943 break;
3947 } else if (!Current->Finalized && Current->SpacesRequiredBefore == 0 &&
3948 spaceRequiredBefore(Line, *Current)) {
3949 Current->SpacesRequiredBefore = 1;
3952 const auto &Children = Prev->Children;
3953 if (!Children.empty() && Children.back()->Last->is(TT_LineComment)) {
3954 Current->MustBreakBefore = true;
3955 } else {
3956 Current->MustBreakBefore =
3957 Current->MustBreakBefore || mustBreakBefore(Line, *Current);
3958 if (!Current->MustBreakBefore && InFunctionDecl &&
3959 Current->is(TT_FunctionDeclarationName)) {
3960 Current->MustBreakBefore = mustBreakForReturnType(Line);
3964 Current->CanBreakBefore =
3965 Current->MustBreakBefore || canBreakBefore(Line, *Current);
3966 unsigned ChildSize = 0;
3967 if (Prev->Children.size() == 1) {
3968 FormatToken &LastOfChild = *Prev->Children[0]->Last;
3969 ChildSize = LastOfChild.isTrailingComment() ? Style.ColumnLimit
3970 : LastOfChild.TotalLength + 1;
3972 if (Current->MustBreakBefore || Prev->Children.size() > 1 ||
3973 (Prev->Children.size() == 1 &&
3974 Prev->Children[0]->First->MustBreakBefore) ||
3975 Current->IsMultiline) {
3976 Current->TotalLength = Prev->TotalLength + Style.ColumnLimit;
3977 } else {
3978 Current->TotalLength = Prev->TotalLength + Current->ColumnWidth +
3979 ChildSize + Current->SpacesRequiredBefore;
3982 if (Current->is(TT_CtorInitializerColon))
3983 InFunctionDecl = false;
3985 // FIXME: Only calculate this if CanBreakBefore is true once static
3986 // initializers etc. are sorted out.
3987 // FIXME: Move magic numbers to a better place.
3989 // Reduce penalty for aligning ObjC method arguments using the colon
3990 // alignment as this is the canonical way (still prefer fitting everything
3991 // into one line if possible). Trying to fit a whole expression into one
3992 // line should not force other line breaks (e.g. when ObjC method
3993 // expression is a part of other expression).
3994 Current->SplitPenalty = splitPenalty(Line, *Current, InFunctionDecl);
3995 if (Style.Language == FormatStyle::LK_ObjC &&
3996 Current->is(TT_SelectorName) && Current->ParameterIndex > 0) {
3997 if (Current->ParameterIndex == 1)
3998 Current->SplitPenalty += 5 * Current->BindingStrength;
3999 } else {
4000 Current->SplitPenalty += 20 * Current->BindingStrength;
4003 Current = Current->Next;
4006 calculateUnbreakableTailLengths(Line);
4007 unsigned IndentLevel = Line.Level;
4008 for (Current = First; Current; Current = Current->Next) {
4009 if (Current->Role)
4010 Current->Role->precomputeFormattingInfos(Current);
4011 if (Current->MatchingParen &&
4012 Current->MatchingParen->opensBlockOrBlockTypeList(Style) &&
4013 IndentLevel > 0) {
4014 --IndentLevel;
4016 Current->IndentLevel = IndentLevel;
4017 if (Current->opensBlockOrBlockTypeList(Style))
4018 ++IndentLevel;
4021 LLVM_DEBUG({ printDebugInfo(Line); });
4024 void TokenAnnotator::calculateUnbreakableTailLengths(
4025 AnnotatedLine &Line) const {
4026 unsigned UnbreakableTailLength = 0;
4027 FormatToken *Current = Line.Last;
4028 while (Current) {
4029 Current->UnbreakableTailLength = UnbreakableTailLength;
4030 if (Current->CanBreakBefore ||
4031 Current->isOneOf(tok::comment, tok::string_literal)) {
4032 UnbreakableTailLength = 0;
4033 } else {
4034 UnbreakableTailLength +=
4035 Current->ColumnWidth + Current->SpacesRequiredBefore;
4037 Current = Current->Previous;
4041 void TokenAnnotator::calculateArrayInitializerColumnList(
4042 AnnotatedLine &Line) const {
4043 if (Line.First == Line.Last)
4044 return;
4045 auto *CurrentToken = Line.First;
4046 CurrentToken->ArrayInitializerLineStart = true;
4047 unsigned Depth = 0;
4048 while (CurrentToken && CurrentToken != Line.Last) {
4049 if (CurrentToken->is(tok::l_brace)) {
4050 CurrentToken->IsArrayInitializer = true;
4051 if (CurrentToken->Next)
4052 CurrentToken->Next->MustBreakBefore = true;
4053 CurrentToken =
4054 calculateInitializerColumnList(Line, CurrentToken->Next, Depth + 1);
4055 } else {
4056 CurrentToken = CurrentToken->Next;
4061 FormatToken *TokenAnnotator::calculateInitializerColumnList(
4062 AnnotatedLine &Line, FormatToken *CurrentToken, unsigned Depth) const {
4063 while (CurrentToken && CurrentToken != Line.Last) {
4064 if (CurrentToken->is(tok::l_brace))
4065 ++Depth;
4066 else if (CurrentToken->is(tok::r_brace))
4067 --Depth;
4068 if (Depth == 2 && CurrentToken->isOneOf(tok::l_brace, tok::comma)) {
4069 CurrentToken = CurrentToken->Next;
4070 if (!CurrentToken)
4071 break;
4072 CurrentToken->StartsColumn = true;
4073 CurrentToken = CurrentToken->Previous;
4075 CurrentToken = CurrentToken->Next;
4077 return CurrentToken;
4080 unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
4081 const FormatToken &Tok,
4082 bool InFunctionDecl) const {
4083 const FormatToken &Left = *Tok.Previous;
4084 const FormatToken &Right = Tok;
4086 if (Left.is(tok::semi))
4087 return 0;
4089 // Language specific handling.
4090 if (Style.Language == FormatStyle::LK_Java) {
4091 if (Right.isOneOf(Keywords.kw_extends, Keywords.kw_throws))
4092 return 1;
4093 if (Right.is(Keywords.kw_implements))
4094 return 2;
4095 if (Left.is(tok::comma) && Left.NestingLevel == 0)
4096 return 3;
4097 } else if (Style.isJavaScript()) {
4098 if (Right.is(Keywords.kw_function) && Left.isNot(tok::comma))
4099 return 100;
4100 if (Left.is(TT_JsTypeColon))
4101 return 35;
4102 if ((Left.is(TT_TemplateString) && Left.TokenText.ends_with("${")) ||
4103 (Right.is(TT_TemplateString) && Right.TokenText.starts_with("}"))) {
4104 return 100;
4106 // Prefer breaking call chains (".foo") over empty "{}", "[]" or "()".
4107 if (Left.opensScope() && Right.closesScope())
4108 return 200;
4109 } else if (Style.Language == FormatStyle::LK_Proto) {
4110 if (Right.is(tok::l_square))
4111 return 1;
4112 if (Right.is(tok::period))
4113 return 500;
4116 if (Right.is(tok::identifier) && Right.Next && Right.Next->is(TT_DictLiteral))
4117 return 1;
4118 if (Right.is(tok::l_square)) {
4119 if (Left.is(tok::r_square))
4120 return 200;
4121 // Slightly prefer formatting local lambda definitions like functions.
4122 if (Right.is(TT_LambdaLSquare) && Left.is(tok::equal))
4123 return 35;
4124 if (!Right.isOneOf(TT_ObjCMethodExpr, TT_LambdaLSquare,
4125 TT_ArrayInitializerLSquare,
4126 TT_DesignatedInitializerLSquare, TT_AttributeSquare)) {
4127 return 500;
4131 if (Left.is(tok::coloncolon))
4132 return Style.PenaltyBreakScopeResolution;
4133 if (Right.isOneOf(TT_StartOfName, TT_FunctionDeclarationName) ||
4134 Right.is(tok::kw_operator)) {
4135 if (Line.startsWith(tok::kw_for) && Right.PartOfMultiVariableDeclStmt)
4136 return 3;
4137 if (Left.is(TT_StartOfName))
4138 return 110;
4139 if (InFunctionDecl && Right.NestingLevel == 0)
4140 return Style.PenaltyReturnTypeOnItsOwnLine;
4141 return 200;
4143 if (Right.is(TT_PointerOrReference))
4144 return 190;
4145 if (Right.is(TT_TrailingReturnArrow))
4146 return 110;
4147 if (Left.is(tok::equal) && Right.is(tok::l_brace))
4148 return 160;
4149 if (Left.is(TT_CastRParen))
4150 return 100;
4151 if (Left.isOneOf(tok::kw_class, tok::kw_struct, tok::kw_union))
4152 return 5000;
4153 if (Left.is(tok::comment))
4154 return 1000;
4156 if (Left.isOneOf(TT_RangeBasedForLoopColon, TT_InheritanceColon,
4157 TT_CtorInitializerColon)) {
4158 return 2;
4161 if (Right.isMemberAccess()) {
4162 // Breaking before the "./->" of a chained call/member access is reasonably
4163 // cheap, as formatting those with one call per line is generally
4164 // desirable. In particular, it should be cheaper to break before the call
4165 // than it is to break inside a call's parameters, which could lead to weird
4166 // "hanging" indents. The exception is the very last "./->" to support this
4167 // frequent pattern:
4169 // aaaaaaaa.aaaaaaaa.bbbbbbb().ccccccccccccccccccccc(
4170 // dddddddd);
4172 // which might otherwise be blown up onto many lines. Here, clang-format
4173 // won't produce "hanging" indents anyway as there is no other trailing
4174 // call.
4176 // Also apply higher penalty is not a call as that might lead to a wrapping
4177 // like:
4179 // aaaaaaa
4180 // .aaaaaaaaa.bbbbbbbb(cccccccc);
4181 return !Right.NextOperator || !Right.NextOperator->Previous->closesScope()
4182 ? 150
4183 : 35;
4186 if (Right.is(TT_TrailingAnnotation) &&
4187 (!Right.Next || Right.Next->isNot(tok::l_paren))) {
4188 // Moving trailing annotations to the next line is fine for ObjC method
4189 // declarations.
4190 if (Line.startsWith(TT_ObjCMethodSpecifier))
4191 return 10;
4192 // Generally, breaking before a trailing annotation is bad unless it is
4193 // function-like. It seems to be especially preferable to keep standard
4194 // annotations (i.e. "const", "final" and "override") on the same line.
4195 // Use a slightly higher penalty after ")" so that annotations like
4196 // "const override" are kept together.
4197 bool is_short_annotation = Right.TokenText.size() < 10;
4198 return (Left.is(tok::r_paren) ? 100 : 120) + (is_short_annotation ? 50 : 0);
4201 // In for-loops, prefer breaking at ',' and ';'.
4202 if (Line.startsWith(tok::kw_for) && Left.is(tok::equal))
4203 return 4;
4205 // In Objective-C method expressions, prefer breaking before "param:" over
4206 // breaking after it.
4207 if (Right.is(TT_SelectorName))
4208 return 0;
4209 if (Left.is(tok::colon) && Left.is(TT_ObjCMethodExpr))
4210 return Line.MightBeFunctionDecl ? 50 : 500;
4212 // In Objective-C type declarations, avoid breaking after the category's
4213 // open paren (we'll prefer breaking after the protocol list's opening
4214 // angle bracket, if present).
4215 if (Line.Type == LT_ObjCDecl && Left.is(tok::l_paren) && Left.Previous &&
4216 Left.Previous->isOneOf(tok::identifier, tok::greater)) {
4217 return 500;
4220 if (Left.is(tok::l_paren) && Style.PenaltyBreakOpenParenthesis != 0)
4221 return Style.PenaltyBreakOpenParenthesis;
4222 if (Left.is(tok::l_paren) && InFunctionDecl &&
4223 Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign) {
4224 return 100;
4226 if (Left.is(tok::l_paren) && Left.Previous &&
4227 (Left.Previous->isOneOf(tok::kw_for, tok::kw__Generic) ||
4228 Left.Previous->isIf())) {
4229 return 1000;
4231 if (Left.is(tok::equal) && InFunctionDecl)
4232 return 110;
4233 if (Right.is(tok::r_brace))
4234 return 1;
4235 if (Left.is(TT_TemplateOpener))
4236 return 100;
4237 if (Left.opensScope()) {
4238 // If we aren't aligning after opening parens/braces we can always break
4239 // here unless the style does not want us to place all arguments on the
4240 // next line.
4241 if (Style.AlignAfterOpenBracket == FormatStyle::BAS_DontAlign &&
4242 (Left.ParameterCount <= 1 || Style.AllowAllArgumentsOnNextLine)) {
4243 return 0;
4245 if (Left.is(tok::l_brace) && !Style.Cpp11BracedListStyle)
4246 return 19;
4247 return Left.ParameterCount > 1 ? Style.PenaltyBreakBeforeFirstCallParameter
4248 : 19;
4250 if (Left.is(TT_JavaAnnotation))
4251 return 50;
4253 if (Left.is(TT_UnaryOperator))
4254 return 60;
4255 if (Left.isOneOf(tok::plus, tok::comma) && Left.Previous &&
4256 Left.Previous->isLabelString() &&
4257 (Left.NextOperator || Left.OperatorIndex != 0)) {
4258 return 50;
4260 if (Right.is(tok::plus) && Left.isLabelString() &&
4261 (Right.NextOperator || Right.OperatorIndex != 0)) {
4262 return 25;
4264 if (Left.is(tok::comma))
4265 return 1;
4266 if (Right.is(tok::lessless) && Left.isLabelString() &&
4267 (Right.NextOperator || Right.OperatorIndex != 1)) {
4268 return 25;
4270 if (Right.is(tok::lessless)) {
4271 // Breaking at a << is really cheap.
4272 if (Left.isNot(tok::r_paren) || Right.OperatorIndex > 0) {
4273 // Slightly prefer to break before the first one in log-like statements.
4274 return 2;
4276 return 1;
4278 if (Left.ClosesTemplateDeclaration)
4279 return Style.PenaltyBreakTemplateDeclaration;
4280 if (Left.ClosesRequiresClause)
4281 return 0;
4282 if (Left.is(TT_ConditionalExpr))
4283 return prec::Conditional;
4284 prec::Level Level = Left.getPrecedence();
4285 if (Level == prec::Unknown)
4286 Level = Right.getPrecedence();
4287 if (Level == prec::Assignment)
4288 return Style.PenaltyBreakAssignment;
4289 if (Level != prec::Unknown)
4290 return Level;
4292 return 3;
4295 bool TokenAnnotator::spaceRequiredBeforeParens(const FormatToken &Right) const {
4296 if (Style.SpaceBeforeParens == FormatStyle::SBPO_Always)
4297 return true;
4298 if (Right.is(TT_OverloadedOperatorLParen) &&
4299 Style.SpaceBeforeParensOptions.AfterOverloadedOperator) {
4300 return true;
4302 if (Style.SpaceBeforeParensOptions.BeforeNonEmptyParentheses &&
4303 Right.ParameterCount > 0) {
4304 return true;
4306 return false;
4309 bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
4310 const FormatToken &Left,
4311 const FormatToken &Right) const {
4312 if (Left.is(tok::kw_return) &&
4313 !Right.isOneOf(tok::semi, tok::r_paren, tok::hashhash)) {
4314 return true;
4316 if (Left.is(tok::kw_throw) && Right.is(tok::l_paren) && Right.MatchingParen &&
4317 Right.MatchingParen->is(TT_CastRParen)) {
4318 return true;
4320 if (Left.is(Keywords.kw_assert) && Style.Language == FormatStyle::LK_Java)
4321 return true;
4322 if (Style.ObjCSpaceAfterProperty && Line.Type == LT_ObjCProperty &&
4323 Left.Tok.getObjCKeywordID() == tok::objc_property) {
4324 return true;
4326 if (Right.is(tok::hashhash))
4327 return Left.is(tok::hash);
4328 if (Left.isOneOf(tok::hashhash, tok::hash))
4329 return Right.is(tok::hash);
4330 if (Left.is(BK_Block) && Right.is(tok::r_brace) &&
4331 Right.MatchingParen == &Left && Line.Children.empty()) {
4332 return Style.SpaceInEmptyBlock;
4334 if ((Left.is(tok::l_paren) && Right.is(tok::r_paren)) ||
4335 (Left.is(tok::l_brace) && Left.isNot(BK_Block) &&
4336 Right.is(tok::r_brace) && Right.isNot(BK_Block))) {
4337 return Style.SpacesInParensOptions.InEmptyParentheses;
4339 if (Style.SpacesInParensOptions.InConditionalStatements) {
4340 const FormatToken *LeftParen = nullptr;
4341 if (Left.is(tok::l_paren))
4342 LeftParen = &Left;
4343 else if (Right.is(tok::r_paren) && Right.MatchingParen)
4344 LeftParen = Right.MatchingParen;
4345 if (LeftParen) {
4346 if (LeftParen->is(TT_ConditionLParen))
4347 return true;
4348 if (LeftParen->Previous && isKeywordWithCondition(*LeftParen->Previous))
4349 return true;
4353 // trailing return type 'auto': []() -> auto {}, auto foo() -> auto {}
4354 if (Left.is(tok::kw_auto) && Right.isOneOf(TT_LambdaLBrace, TT_FunctionLBrace,
4355 // function return type 'auto'
4356 TT_FunctionTypeLParen)) {
4357 return true;
4360 // auto{x} auto(x)
4361 if (Left.is(tok::kw_auto) && Right.isOneOf(tok::l_paren, tok::l_brace))
4362 return false;
4364 const auto *BeforeLeft = Left.Previous;
4366 // operator co_await(x)
4367 if (Right.is(tok::l_paren) && Left.is(tok::kw_co_await) && BeforeLeft &&
4368 BeforeLeft->is(tok::kw_operator)) {
4369 return false;
4371 // co_await (x), co_yield (x), co_return (x)
4372 if (Left.isOneOf(tok::kw_co_await, tok::kw_co_yield, tok::kw_co_return) &&
4373 !Right.isOneOf(tok::semi, tok::r_paren)) {
4374 return true;
4377 if (Left.is(tok::l_paren) || Right.is(tok::r_paren)) {
4378 return (Right.is(TT_CastRParen) ||
4379 (Left.MatchingParen && Left.MatchingParen->is(TT_CastRParen)))
4380 ? Style.SpacesInParensOptions.InCStyleCasts
4381 : Style.SpacesInParensOptions.Other;
4383 if (Right.isOneOf(tok::semi, tok::comma))
4384 return false;
4385 if (Right.is(tok::less) && Line.Type == LT_ObjCDecl) {
4386 bool IsLightweightGeneric = Right.MatchingParen &&
4387 Right.MatchingParen->Next &&
4388 Right.MatchingParen->Next->is(tok::colon);
4389 return !IsLightweightGeneric && Style.ObjCSpaceBeforeProtocolList;
4391 if (Right.is(tok::less) && Left.is(tok::kw_template))
4392 return Style.SpaceAfterTemplateKeyword;
4393 if (Left.isOneOf(tok::exclaim, tok::tilde))
4394 return false;
4395 if (Left.is(tok::at) &&
4396 Right.isOneOf(tok::identifier, tok::string_literal, tok::char_constant,
4397 tok::numeric_constant, tok::l_paren, tok::l_brace,
4398 tok::kw_true, tok::kw_false)) {
4399 return false;
4401 if (Left.is(tok::colon))
4402 return Left.isNot(TT_ObjCMethodExpr);
4403 if (Left.is(tok::coloncolon)) {
4404 return Right.is(tok::star) && Right.is(TT_PointerOrReference) &&
4405 Style.PointerAlignment != FormatStyle::PAS_Left;
4407 if (Left.is(tok::less) || Right.isOneOf(tok::greater, tok::less)) {
4408 if (Style.Language == FormatStyle::LK_TextProto ||
4409 (Style.Language == FormatStyle::LK_Proto &&
4410 (Left.is(TT_DictLiteral) || Right.is(TT_DictLiteral)))) {
4411 // Format empty list as `<>`.
4412 if (Left.is(tok::less) && Right.is(tok::greater))
4413 return false;
4414 return !Style.Cpp11BracedListStyle;
4416 // Don't attempt to format operator<(), as it is handled later.
4417 if (Right.isNot(TT_OverloadedOperatorLParen))
4418 return false;
4420 if (Right.is(tok::ellipsis)) {
4421 return Left.Tok.isLiteral() || (Left.is(tok::identifier) && BeforeLeft &&
4422 BeforeLeft->is(tok::kw_case));
4424 if (Left.is(tok::l_square) && Right.is(tok::amp))
4425 return Style.SpacesInSquareBrackets;
4426 if (Right.is(TT_PointerOrReference)) {
4427 if (Left.is(tok::r_paren) && Line.MightBeFunctionDecl) {
4428 if (!Left.MatchingParen)
4429 return true;
4430 FormatToken *TokenBeforeMatchingParen =
4431 Left.MatchingParen->getPreviousNonComment();
4432 if (!TokenBeforeMatchingParen || Left.isNot(TT_TypeDeclarationParen))
4433 return true;
4435 // Add a space if the previous token is a pointer qualifier or the closing
4436 // parenthesis of __attribute__(()) expression and the style requires spaces
4437 // after pointer qualifiers.
4438 if ((Style.SpaceAroundPointerQualifiers == FormatStyle::SAPQ_After ||
4439 Style.SpaceAroundPointerQualifiers == FormatStyle::SAPQ_Both) &&
4440 (Left.is(TT_AttributeRParen) ||
4441 Left.canBePointerOrReferenceQualifier())) {
4442 return true;
4444 if (Left.Tok.isLiteral())
4445 return true;
4446 // for (auto a = 0, b = 0; const auto & c : {1, 2, 3})
4447 if (Left.isTypeOrIdentifier(IsCpp) && Right.Next && Right.Next->Next &&
4448 Right.Next->Next->is(TT_RangeBasedForLoopColon)) {
4449 return getTokenPointerOrReferenceAlignment(Right) !=
4450 FormatStyle::PAS_Left;
4452 return !Left.isOneOf(TT_PointerOrReference, tok::l_paren) &&
4453 (getTokenPointerOrReferenceAlignment(Right) !=
4454 FormatStyle::PAS_Left ||
4455 (Line.IsMultiVariableDeclStmt &&
4456 (Left.NestingLevel == 0 ||
4457 (Left.NestingLevel == 1 && startsWithInitStatement(Line)))));
4459 if (Right.is(TT_FunctionTypeLParen) && Left.isNot(tok::l_paren) &&
4460 (Left.isNot(TT_PointerOrReference) ||
4461 (getTokenPointerOrReferenceAlignment(Left) != FormatStyle::PAS_Right &&
4462 !Line.IsMultiVariableDeclStmt))) {
4463 return true;
4465 if (Left.is(TT_PointerOrReference)) {
4466 // Add a space if the next token is a pointer qualifier and the style
4467 // requires spaces before pointer qualifiers.
4468 if ((Style.SpaceAroundPointerQualifiers == FormatStyle::SAPQ_Before ||
4469 Style.SpaceAroundPointerQualifiers == FormatStyle::SAPQ_Both) &&
4470 Right.canBePointerOrReferenceQualifier()) {
4471 return true;
4473 // & 1
4474 if (Right.Tok.isLiteral())
4475 return true;
4476 // & /* comment
4477 if (Right.is(TT_BlockComment))
4478 return true;
4479 // foo() -> const Bar * override/final
4480 // S::foo() & noexcept/requires
4481 if (Right.isOneOf(Keywords.kw_override, Keywords.kw_final, tok::kw_noexcept,
4482 TT_RequiresClause) &&
4483 Right.isNot(TT_StartOfName)) {
4484 return true;
4486 // & {
4487 if (Right.is(tok::l_brace) && Right.is(BK_Block))
4488 return true;
4489 // for (auto a = 0, b = 0; const auto& c : {1, 2, 3})
4490 if (BeforeLeft && BeforeLeft->isTypeOrIdentifier(IsCpp) && Right.Next &&
4491 Right.Next->is(TT_RangeBasedForLoopColon)) {
4492 return getTokenPointerOrReferenceAlignment(Left) !=
4493 FormatStyle::PAS_Right;
4495 if (Right.isOneOf(TT_PointerOrReference, TT_ArraySubscriptLSquare,
4496 tok::l_paren)) {
4497 return false;
4499 if (getTokenPointerOrReferenceAlignment(Left) == FormatStyle::PAS_Right)
4500 return false;
4501 // FIXME: Setting IsMultiVariableDeclStmt for the whole line is error-prone,
4502 // because it does not take into account nested scopes like lambdas.
4503 // In multi-variable declaration statements, attach */& to the variable
4504 // independently of the style. However, avoid doing it if we are in a nested
4505 // scope, e.g. lambda. We still need to special-case statements with
4506 // initializers.
4507 if (Line.IsMultiVariableDeclStmt &&
4508 (Left.NestingLevel == Line.First->NestingLevel ||
4509 ((Left.NestingLevel == Line.First->NestingLevel + 1) &&
4510 startsWithInitStatement(Line)))) {
4511 return false;
4513 if (!BeforeLeft)
4514 return false;
4515 if (BeforeLeft->is(tok::coloncolon)) {
4516 return Left.is(tok::star) &&
4517 Style.PointerAlignment != FormatStyle::PAS_Right;
4519 return !BeforeLeft->isOneOf(tok::l_paren, tok::l_square);
4521 // Ensure right pointer alignment with ellipsis e.g. int *...P
4522 if (Left.is(tok::ellipsis) && BeforeLeft &&
4523 BeforeLeft->isPointerOrReference()) {
4524 return Style.PointerAlignment != FormatStyle::PAS_Right;
4527 if (Right.is(tok::star) && Left.is(tok::l_paren))
4528 return false;
4529 if (Left.is(tok::star) && Right.isPointerOrReference())
4530 return false;
4531 if (Right.isPointerOrReference()) {
4532 const FormatToken *Previous = &Left;
4533 while (Previous && Previous->isNot(tok::kw_operator)) {
4534 if (Previous->is(tok::identifier) || Previous->isTypeName(IsCpp)) {
4535 Previous = Previous->getPreviousNonComment();
4536 continue;
4538 if (Previous->is(TT_TemplateCloser) && Previous->MatchingParen) {
4539 Previous = Previous->MatchingParen->getPreviousNonComment();
4540 continue;
4542 if (Previous->is(tok::coloncolon)) {
4543 Previous = Previous->getPreviousNonComment();
4544 continue;
4546 break;
4548 // Space between the type and the * in:
4549 // operator void*()
4550 // operator char*()
4551 // operator void const*()
4552 // operator void volatile*()
4553 // operator /*comment*/ const char*()
4554 // operator volatile /*comment*/ char*()
4555 // operator Foo*()
4556 // operator C<T>*()
4557 // operator std::Foo*()
4558 // operator C<T>::D<U>*()
4559 // dependent on PointerAlignment style.
4560 if (Previous) {
4561 if (Previous->endsSequence(tok::kw_operator))
4562 return Style.PointerAlignment != FormatStyle::PAS_Left;
4563 if (Previous->is(tok::kw_const) || Previous->is(tok::kw_volatile)) {
4564 return (Style.PointerAlignment != FormatStyle::PAS_Left) ||
4565 (Style.SpaceAroundPointerQualifiers ==
4566 FormatStyle::SAPQ_After) ||
4567 (Style.SpaceAroundPointerQualifiers == FormatStyle::SAPQ_Both);
4571 if (Style.isCSharp() && Left.is(Keywords.kw_is) && Right.is(tok::l_square))
4572 return true;
4573 const auto SpaceRequiredForArrayInitializerLSquare =
4574 [](const FormatToken &LSquareTok, const FormatStyle &Style) {
4575 return Style.SpacesInContainerLiterals ||
4576 (Style.isProto() && !Style.Cpp11BracedListStyle &&
4577 LSquareTok.endsSequence(tok::l_square, tok::colon,
4578 TT_SelectorName));
4580 if (Left.is(tok::l_square)) {
4581 return (Left.is(TT_ArrayInitializerLSquare) && Right.isNot(tok::r_square) &&
4582 SpaceRequiredForArrayInitializerLSquare(Left, Style)) ||
4583 (Left.isOneOf(TT_ArraySubscriptLSquare, TT_StructuredBindingLSquare,
4584 TT_LambdaLSquare) &&
4585 Style.SpacesInSquareBrackets && Right.isNot(tok::r_square));
4587 if (Right.is(tok::r_square)) {
4588 return Right.MatchingParen &&
4589 ((Right.MatchingParen->is(TT_ArrayInitializerLSquare) &&
4590 SpaceRequiredForArrayInitializerLSquare(*Right.MatchingParen,
4591 Style)) ||
4592 (Style.SpacesInSquareBrackets &&
4593 Right.MatchingParen->isOneOf(TT_ArraySubscriptLSquare,
4594 TT_StructuredBindingLSquare,
4595 TT_LambdaLSquare)));
4597 if (Right.is(tok::l_square) &&
4598 !Right.isOneOf(TT_ObjCMethodExpr, TT_LambdaLSquare,
4599 TT_DesignatedInitializerLSquare,
4600 TT_StructuredBindingLSquare, TT_AttributeSquare) &&
4601 !Left.isOneOf(tok::numeric_constant, TT_DictLiteral) &&
4602 !(Left.isNot(tok::r_square) && Style.SpaceBeforeSquareBrackets &&
4603 Right.is(TT_ArraySubscriptLSquare))) {
4604 return false;
4606 if (Left.is(tok::l_brace) && Right.is(tok::r_brace))
4607 return !Left.Children.empty(); // No spaces in "{}".
4608 if ((Left.is(tok::l_brace) && Left.isNot(BK_Block)) ||
4609 (Right.is(tok::r_brace) && Right.MatchingParen &&
4610 Right.MatchingParen->isNot(BK_Block))) {
4611 return !Style.Cpp11BracedListStyle || Style.SpacesInParensOptions.Other;
4613 if (Left.is(TT_BlockComment)) {
4614 // No whitespace in x(/*foo=*/1), except for JavaScript.
4615 return Style.isJavaScript() || !Left.TokenText.ends_with("=*/");
4618 // Space between template and attribute.
4619 // e.g. template <typename T> [[nodiscard]] ...
4620 if (Left.is(TT_TemplateCloser) && Right.is(TT_AttributeSquare))
4621 return true;
4622 // Space before parentheses common for all languages
4623 if (Right.is(tok::l_paren)) {
4624 if (Left.is(TT_TemplateCloser) && Right.isNot(TT_FunctionTypeLParen))
4625 return spaceRequiredBeforeParens(Right);
4626 if (Left.isOneOf(TT_RequiresClause,
4627 TT_RequiresClauseInARequiresExpression)) {
4628 return Style.SpaceBeforeParensOptions.AfterRequiresInClause ||
4629 spaceRequiredBeforeParens(Right);
4631 if (Left.is(TT_RequiresExpression)) {
4632 return Style.SpaceBeforeParensOptions.AfterRequiresInExpression ||
4633 spaceRequiredBeforeParens(Right);
4635 if (Left.is(TT_AttributeRParen) ||
4636 (Left.is(tok::r_square) && Left.is(TT_AttributeSquare))) {
4637 return true;
4639 if (Left.is(TT_ForEachMacro)) {
4640 return Style.SpaceBeforeParensOptions.AfterForeachMacros ||
4641 spaceRequiredBeforeParens(Right);
4643 if (Left.is(TT_IfMacro)) {
4644 return Style.SpaceBeforeParensOptions.AfterIfMacros ||
4645 spaceRequiredBeforeParens(Right);
4647 if (Style.SpaceBeforeParens == FormatStyle::SBPO_Custom &&
4648 Left.isOneOf(tok::kw_new, tok::kw_delete) &&
4649 Right.isNot(TT_OverloadedOperatorLParen) &&
4650 !(Line.MightBeFunctionDecl && Left.is(TT_FunctionDeclarationName))) {
4651 return Style.SpaceBeforeParensOptions.AfterPlacementOperator;
4653 if (Line.Type == LT_ObjCDecl)
4654 return true;
4655 if (Left.is(tok::semi))
4656 return true;
4657 if (Left.isOneOf(tok::pp_elif, tok::kw_for, tok::kw_while, tok::kw_switch,
4658 tok::kw_case, TT_ForEachMacro, TT_ObjCForIn) ||
4659 Left.isIf(Line.Type != LT_PreprocessorDirective) ||
4660 Right.is(TT_ConditionLParen)) {
4661 return Style.SpaceBeforeParensOptions.AfterControlStatements ||
4662 spaceRequiredBeforeParens(Right);
4665 // TODO add Operator overloading specific Options to
4666 // SpaceBeforeParensOptions
4667 if (Right.is(TT_OverloadedOperatorLParen))
4668 return spaceRequiredBeforeParens(Right);
4669 // Function declaration or definition
4670 if (Line.MightBeFunctionDecl && (Left.is(TT_FunctionDeclarationName))) {
4671 if (Line.mightBeFunctionDefinition()) {
4672 return Style.SpaceBeforeParensOptions.AfterFunctionDefinitionName ||
4673 spaceRequiredBeforeParens(Right);
4674 } else {
4675 return Style.SpaceBeforeParensOptions.AfterFunctionDeclarationName ||
4676 spaceRequiredBeforeParens(Right);
4679 // Lambda
4680 if (Line.Type != LT_PreprocessorDirective && Left.is(tok::r_square) &&
4681 Left.MatchingParen && Left.MatchingParen->is(TT_LambdaLSquare)) {
4682 return Style.SpaceBeforeParensOptions.AfterFunctionDefinitionName ||
4683 spaceRequiredBeforeParens(Right);
4685 if (!BeforeLeft || !BeforeLeft->isOneOf(tok::period, tok::arrow)) {
4686 if (Left.isOneOf(tok::kw_try, Keywords.kw___except, tok::kw_catch)) {
4687 return Style.SpaceBeforeParensOptions.AfterControlStatements ||
4688 spaceRequiredBeforeParens(Right);
4690 if (Left.isOneOf(tok::kw_new, tok::kw_delete)) {
4691 return ((!Line.MightBeFunctionDecl || !BeforeLeft) &&
4692 Style.SpaceBeforeParens != FormatStyle::SBPO_Never) ||
4693 spaceRequiredBeforeParens(Right);
4696 if (Left.is(tok::r_square) && Left.MatchingParen &&
4697 Left.MatchingParen->Previous &&
4698 Left.MatchingParen->Previous->is(tok::kw_delete)) {
4699 return (Style.SpaceBeforeParens != FormatStyle::SBPO_Never) ||
4700 spaceRequiredBeforeParens(Right);
4703 // Handle builtins like identifiers.
4704 if (Line.Type != LT_PreprocessorDirective &&
4705 (Left.Tok.getIdentifierInfo() || Left.is(tok::r_paren))) {
4706 return spaceRequiredBeforeParens(Right);
4708 return false;
4710 if (Left.is(tok::at) && Right.Tok.getObjCKeywordID() != tok::objc_not_keyword)
4711 return false;
4712 if (Right.is(TT_UnaryOperator)) {
4713 return !Left.isOneOf(tok::l_paren, tok::l_square, tok::at) &&
4714 (Left.isNot(tok::colon) || Left.isNot(TT_ObjCMethodExpr));
4716 // No space between the variable name and the initializer list.
4717 // A a1{1};
4718 // Verilog doesn't have such syntax, but it has word operators that are C++
4719 // identifiers like `a inside {b, c}`. So the rule is not applicable.
4720 if (!Style.isVerilog() &&
4721 (Left.isOneOf(tok::identifier, tok::greater, tok::r_square,
4722 tok::r_paren) ||
4723 Left.isTypeName(IsCpp)) &&
4724 Right.is(tok::l_brace) && Right.getNextNonComment() &&
4725 Right.isNot(BK_Block)) {
4726 return false;
4728 if (Left.is(tok::period) || Right.is(tok::period))
4729 return false;
4730 // u#str, U#str, L#str, u8#str
4731 // uR#str, UR#str, LR#str, u8R#str
4732 if (Right.is(tok::hash) && Left.is(tok::identifier) &&
4733 (Left.TokenText == "L" || Left.TokenText == "u" ||
4734 Left.TokenText == "U" || Left.TokenText == "u8" ||
4735 Left.TokenText == "LR" || Left.TokenText == "uR" ||
4736 Left.TokenText == "UR" || Left.TokenText == "u8R")) {
4737 return false;
4739 if (Left.is(TT_TemplateCloser) && Left.MatchingParen &&
4740 Left.MatchingParen->Previous &&
4741 (Left.MatchingParen->Previous->is(tok::period) ||
4742 Left.MatchingParen->Previous->is(tok::coloncolon))) {
4743 // Java call to generic function with explicit type:
4744 // A.<B<C<...>>>DoSomething();
4745 // A::<B<C<...>>>DoSomething(); // With a Java 8 method reference.
4746 return false;
4748 if (Left.is(TT_TemplateCloser) && Right.is(tok::l_square))
4749 return false;
4750 if (Left.is(tok::l_brace) && Left.endsSequence(TT_DictLiteral, tok::at)) {
4751 // Objective-C dictionary literal -> no space after opening brace.
4752 return false;
4754 if (Right.is(tok::r_brace) && Right.MatchingParen &&
4755 Right.MatchingParen->endsSequence(TT_DictLiteral, tok::at)) {
4756 // Objective-C dictionary literal -> no space before closing brace.
4757 return false;
4759 if (Right.is(TT_TrailingAnnotation) && Right.isOneOf(tok::amp, tok::ampamp) &&
4760 Left.isOneOf(tok::kw_const, tok::kw_volatile) &&
4761 (!Right.Next || Right.Next->is(tok::semi))) {
4762 // Match const and volatile ref-qualifiers without any additional
4763 // qualifiers such as
4764 // void Fn() const &;
4765 return getTokenReferenceAlignment(Right) != FormatStyle::PAS_Left;
4768 return true;
4771 bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
4772 const FormatToken &Right) const {
4773 const FormatToken &Left = *Right.Previous;
4775 // If the token is finalized don't touch it (as it could be in a
4776 // clang-format-off section).
4777 if (Left.Finalized)
4778 return Right.hasWhitespaceBefore();
4780 // Never ever merge two words.
4781 if (Keywords.isWordLike(Right) && Keywords.isWordLike(Left))
4782 return true;
4784 // Leave a space between * and /* to avoid C4138 `comment end` found outside
4785 // of comment.
4786 if (Left.is(tok::star) && Right.is(tok::comment))
4787 return true;
4789 if (IsCpp) {
4790 if (Left.is(TT_OverloadedOperator) &&
4791 Right.isOneOf(TT_TemplateOpener, TT_TemplateCloser)) {
4792 return true;
4794 // Space between UDL and dot: auto b = 4s .count();
4795 if (Right.is(tok::period) && Left.is(tok::numeric_constant))
4796 return true;
4797 // Space between import <iostream>.
4798 // or import .....;
4799 if (Left.is(Keywords.kw_import) && Right.isOneOf(tok::less, tok::ellipsis))
4800 return true;
4801 // Space between `module :` and `import :`.
4802 if (Left.isOneOf(Keywords.kw_module, Keywords.kw_import) &&
4803 Right.is(TT_ModulePartitionColon)) {
4804 return true;
4806 // No space between import foo:bar but keep a space between import :bar;
4807 if (Left.is(tok::identifier) && Right.is(TT_ModulePartitionColon))
4808 return false;
4809 // No space between :bar;
4810 if (Left.is(TT_ModulePartitionColon) &&
4811 Right.isOneOf(tok::identifier, tok::kw_private)) {
4812 return false;
4814 if (Left.is(tok::ellipsis) && Right.is(tok::identifier) &&
4815 Line.First->is(Keywords.kw_import)) {
4816 return false;
4818 // Space in __attribute__((attr)) ::type.
4819 if (Left.isOneOf(TT_AttributeRParen, TT_AttributeMacro) &&
4820 Right.is(tok::coloncolon)) {
4821 return true;
4824 if (Left.is(tok::kw_operator))
4825 return Right.is(tok::coloncolon);
4826 if (Right.is(tok::l_brace) && Right.is(BK_BracedInit) &&
4827 !Left.opensScope() && Style.SpaceBeforeCpp11BracedList) {
4828 return true;
4830 if (Left.is(tok::less) && Left.is(TT_OverloadedOperator) &&
4831 Right.is(TT_TemplateOpener)) {
4832 return true;
4834 if (Left.is(tok::identifier) && Right.is(tok::numeric_constant) &&
4835 Right.TokenText[0] == '.') {
4836 return false;
4838 } else if (Style.isProto()) {
4839 if (Right.is(tok::period) &&
4840 Left.isOneOf(Keywords.kw_optional, Keywords.kw_required,
4841 Keywords.kw_repeated, Keywords.kw_extend)) {
4842 return true;
4844 if (Right.is(tok::l_paren) &&
4845 Left.isOneOf(Keywords.kw_returns, Keywords.kw_option)) {
4846 return true;
4848 if (Right.isOneOf(tok::l_brace, tok::less) && Left.is(TT_SelectorName))
4849 return true;
4850 // Slashes occur in text protocol extension syntax: [type/type] { ... }.
4851 if (Left.is(tok::slash) || Right.is(tok::slash))
4852 return false;
4853 if (Left.MatchingParen &&
4854 Left.MatchingParen->is(TT_ProtoExtensionLSquare) &&
4855 Right.isOneOf(tok::l_brace, tok::less)) {
4856 return !Style.Cpp11BracedListStyle;
4858 // A percent is probably part of a formatting specification, such as %lld.
4859 if (Left.is(tok::percent))
4860 return false;
4861 // Preserve the existence of a space before a percent for cases like 0x%04x
4862 // and "%d %d"
4863 if (Left.is(tok::numeric_constant) && Right.is(tok::percent))
4864 return Right.hasWhitespaceBefore();
4865 } else if (Style.isJson()) {
4866 if (Right.is(tok::colon) && Left.is(tok::string_literal))
4867 return Style.SpaceBeforeJsonColon;
4868 } else if (Style.isCSharp()) {
4869 // Require spaces around '{' and before '}' unless they appear in
4870 // interpolated strings. Interpolated strings are merged into a single token
4871 // so cannot have spaces inserted by this function.
4873 // No space between 'this' and '['
4874 if (Left.is(tok::kw_this) && Right.is(tok::l_square))
4875 return false;
4877 // No space between 'new' and '('
4878 if (Left.is(tok::kw_new) && Right.is(tok::l_paren))
4879 return false;
4881 // Space before { (including space within '{ {').
4882 if (Right.is(tok::l_brace))
4883 return true;
4885 // Spaces inside braces.
4886 if (Left.is(tok::l_brace) && Right.isNot(tok::r_brace))
4887 return true;
4889 if (Left.isNot(tok::l_brace) && Right.is(tok::r_brace))
4890 return true;
4892 // Spaces around '=>'.
4893 if (Left.is(TT_FatArrow) || Right.is(TT_FatArrow))
4894 return true;
4896 // No spaces around attribute target colons
4897 if (Left.is(TT_AttributeColon) || Right.is(TT_AttributeColon))
4898 return false;
4900 // space between type and variable e.g. Dictionary<string,string> foo;
4901 if (Left.is(TT_TemplateCloser) && Right.is(TT_StartOfName))
4902 return true;
4904 // spaces inside square brackets.
4905 if (Left.is(tok::l_square) || Right.is(tok::r_square))
4906 return Style.SpacesInSquareBrackets;
4908 // No space before ? in nullable types.
4909 if (Right.is(TT_CSharpNullable))
4910 return false;
4912 // No space before null forgiving '!'.
4913 if (Right.is(TT_NonNullAssertion))
4914 return false;
4916 // No space between consecutive commas '[,,]'.
4917 if (Left.is(tok::comma) && Right.is(tok::comma))
4918 return false;
4920 // space after var in `var (key, value)`
4921 if (Left.is(Keywords.kw_var) && Right.is(tok::l_paren))
4922 return true;
4924 // space between keywords and paren e.g. "using ("
4925 if (Right.is(tok::l_paren)) {
4926 if (Left.isOneOf(tok::kw_using, Keywords.kw_async, Keywords.kw_when,
4927 Keywords.kw_lock)) {
4928 return Style.SpaceBeforeParensOptions.AfterControlStatements ||
4929 spaceRequiredBeforeParens(Right);
4933 // space between method modifier and opening parenthesis of a tuple return
4934 // type
4935 if (Left.isOneOf(tok::kw_public, tok::kw_private, tok::kw_protected,
4936 tok::kw_virtual, tok::kw_extern, tok::kw_static,
4937 Keywords.kw_internal, Keywords.kw_abstract,
4938 Keywords.kw_sealed, Keywords.kw_override,
4939 Keywords.kw_async, Keywords.kw_unsafe) &&
4940 Right.is(tok::l_paren)) {
4941 return true;
4943 } else if (Style.isJavaScript()) {
4944 if (Left.is(TT_FatArrow))
4945 return true;
4946 // for await ( ...
4947 if (Right.is(tok::l_paren) && Left.is(Keywords.kw_await) && Left.Previous &&
4948 Left.Previous->is(tok::kw_for)) {
4949 return true;
4951 if (Left.is(Keywords.kw_async) && Right.is(tok::l_paren) &&
4952 Right.MatchingParen) {
4953 const FormatToken *Next = Right.MatchingParen->getNextNonComment();
4954 // An async arrow function, for example: `x = async () => foo();`,
4955 // as opposed to calling a function called async: `x = async();`
4956 if (Next && Next->is(TT_FatArrow))
4957 return true;
4959 if ((Left.is(TT_TemplateString) && Left.TokenText.ends_with("${")) ||
4960 (Right.is(TT_TemplateString) && Right.TokenText.starts_with("}"))) {
4961 return false;
4963 // In tagged template literals ("html`bar baz`"), there is no space between
4964 // the tag identifier and the template string.
4965 if (Keywords.IsJavaScriptIdentifier(Left,
4966 /* AcceptIdentifierName= */ false) &&
4967 Right.is(TT_TemplateString)) {
4968 return false;
4970 if (Right.is(tok::star) &&
4971 Left.isOneOf(Keywords.kw_function, Keywords.kw_yield)) {
4972 return false;
4974 if (Right.isOneOf(tok::l_brace, tok::l_square) &&
4975 Left.isOneOf(Keywords.kw_function, Keywords.kw_yield,
4976 Keywords.kw_extends, Keywords.kw_implements)) {
4977 return true;
4979 if (Right.is(tok::l_paren)) {
4980 // JS methods can use some keywords as names (e.g. `delete()`).
4981 if (Line.MustBeDeclaration && Left.Tok.getIdentifierInfo())
4982 return false;
4983 // Valid JS method names can include keywords, e.g. `foo.delete()` or
4984 // `bar.instanceof()`. Recognize call positions by preceding period.
4985 if (Left.Previous && Left.Previous->is(tok::period) &&
4986 Left.Tok.getIdentifierInfo()) {
4987 return false;
4989 // Additional unary JavaScript operators that need a space after.
4990 if (Left.isOneOf(tok::kw_throw, Keywords.kw_await, Keywords.kw_typeof,
4991 tok::kw_void)) {
4992 return true;
4995 // `foo as const;` casts into a const type.
4996 if (Left.endsSequence(tok::kw_const, Keywords.kw_as))
4997 return false;
4998 if ((Left.isOneOf(Keywords.kw_let, Keywords.kw_var, Keywords.kw_in,
4999 tok::kw_const) ||
5000 // "of" is only a keyword if it appears after another identifier
5001 // (e.g. as "const x of y" in a for loop), or after a destructuring
5002 // operation (const [x, y] of z, const {a, b} of c).
5003 (Left.is(Keywords.kw_of) && Left.Previous &&
5004 (Left.Previous->is(tok::identifier) ||
5005 Left.Previous->isOneOf(tok::r_square, tok::r_brace)))) &&
5006 (!Left.Previous || Left.Previous->isNot(tok::period))) {
5007 return true;
5009 if (Left.isOneOf(tok::kw_for, Keywords.kw_as) && Left.Previous &&
5010 Left.Previous->is(tok::period) && Right.is(tok::l_paren)) {
5011 return false;
5013 if (Left.is(Keywords.kw_as) &&
5014 Right.isOneOf(tok::l_square, tok::l_brace, tok::l_paren)) {
5015 return true;
5017 if (Left.is(tok::kw_default) && Left.Previous &&
5018 Left.Previous->is(tok::kw_export)) {
5019 return true;
5021 if (Left.is(Keywords.kw_is) && Right.is(tok::l_brace))
5022 return true;
5023 if (Right.isOneOf(TT_JsTypeColon, TT_JsTypeOptionalQuestion))
5024 return false;
5025 if (Left.is(TT_JsTypeOperator) || Right.is(TT_JsTypeOperator))
5026 return false;
5027 if ((Left.is(tok::l_brace) || Right.is(tok::r_brace)) &&
5028 Line.First->isOneOf(Keywords.kw_import, tok::kw_export)) {
5029 return false;
5031 if (Left.is(tok::ellipsis))
5032 return false;
5033 if (Left.is(TT_TemplateCloser) &&
5034 !Right.isOneOf(tok::equal, tok::l_brace, tok::comma, tok::l_square,
5035 Keywords.kw_implements, Keywords.kw_extends)) {
5036 // Type assertions ('<type>expr') are not followed by whitespace. Other
5037 // locations that should have whitespace following are identified by the
5038 // above set of follower tokens.
5039 return false;
5041 if (Right.is(TT_NonNullAssertion))
5042 return false;
5043 if (Left.is(TT_NonNullAssertion) &&
5044 Right.isOneOf(Keywords.kw_as, Keywords.kw_in)) {
5045 return true; // "x! as string", "x! in y"
5047 } else if (Style.Language == FormatStyle::LK_Java) {
5048 if (Left.is(tok::r_square) && Right.is(tok::l_brace))
5049 return true;
5050 // spaces inside square brackets.
5051 if (Left.is(tok::l_square) || Right.is(tok::r_square))
5052 return Style.SpacesInSquareBrackets;
5054 if (Left.is(Keywords.kw_synchronized) && Right.is(tok::l_paren)) {
5055 return Style.SpaceBeforeParensOptions.AfterControlStatements ||
5056 spaceRequiredBeforeParens(Right);
5058 if ((Left.isOneOf(tok::kw_static, tok::kw_public, tok::kw_private,
5059 tok::kw_protected) ||
5060 Left.isOneOf(Keywords.kw_final, Keywords.kw_abstract,
5061 Keywords.kw_native)) &&
5062 Right.is(TT_TemplateOpener)) {
5063 return true;
5065 } else if (Style.isVerilog()) {
5066 // An escaped identifier ends with whitespace.
5067 if (Style.isVerilog() && Left.is(tok::identifier) &&
5068 Left.TokenText[0] == '\\') {
5069 return true;
5071 // Add space between things in a primitive's state table unless in a
5072 // transition like `(0?)`.
5073 if ((Left.is(TT_VerilogTableItem) &&
5074 !Right.isOneOf(tok::r_paren, tok::semi)) ||
5075 (Right.is(TT_VerilogTableItem) && Left.isNot(tok::l_paren))) {
5076 const FormatToken *Next = Right.getNextNonComment();
5077 return !(Next && Next->is(tok::r_paren));
5079 // Don't add space within a delay like `#0`.
5080 if (Left.isNot(TT_BinaryOperator) &&
5081 Left.isOneOf(Keywords.kw_verilogHash, Keywords.kw_verilogHashHash)) {
5082 return false;
5084 // Add space after a delay.
5085 if (Right.isNot(tok::semi) &&
5086 (Left.endsSequence(tok::numeric_constant, Keywords.kw_verilogHash) ||
5087 Left.endsSequence(tok::numeric_constant,
5088 Keywords.kw_verilogHashHash) ||
5089 (Left.is(tok::r_paren) && Left.MatchingParen &&
5090 Left.MatchingParen->endsSequence(tok::l_paren, tok::at)))) {
5091 return true;
5093 // Don't add embedded spaces in a number literal like `16'h1?ax` or an array
5094 // literal like `'{}`.
5095 if (Left.is(Keywords.kw_apostrophe) ||
5096 (Left.is(TT_VerilogNumberBase) && Right.is(tok::numeric_constant))) {
5097 return false;
5099 // Add spaces around the implication operator `->`.
5100 if (Left.is(tok::arrow) || Right.is(tok::arrow))
5101 return true;
5102 // Don't add spaces between two at signs. Like in a coverage event.
5103 // Don't add spaces between at and a sensitivity list like
5104 // `@(posedge clk)`.
5105 if (Left.is(tok::at) && Right.isOneOf(tok::l_paren, tok::star, tok::at))
5106 return false;
5107 // Add space between the type name and dimension like `logic [1:0]`.
5108 if (Right.is(tok::l_square) &&
5109 Left.isOneOf(TT_VerilogDimensionedTypeName, Keywords.kw_function)) {
5110 return true;
5112 // In a tagged union expression, there should be a space after the tag.
5113 if (Right.isOneOf(tok::period, Keywords.kw_apostrophe) &&
5114 Keywords.isVerilogIdentifier(Left) && Left.getPreviousNonComment() &&
5115 Left.getPreviousNonComment()->is(Keywords.kw_tagged)) {
5116 return true;
5118 // Don't add spaces between a casting type and the quote or repetition count
5119 // and the brace. The case of tagged union expressions is handled by the
5120 // previous rule.
5121 if ((Right.is(Keywords.kw_apostrophe) ||
5122 (Right.is(BK_BracedInit) && Right.is(tok::l_brace))) &&
5123 !(Left.isOneOf(Keywords.kw_assign, Keywords.kw_unique) ||
5124 Keywords.isVerilogWordOperator(Left)) &&
5125 (Left.isOneOf(tok::r_square, tok::r_paren, tok::r_brace,
5126 tok::numeric_constant) ||
5127 Keywords.isWordLike(Left))) {
5128 return false;
5130 // Don't add spaces in imports like `import foo::*;`.
5131 if ((Right.is(tok::star) && Left.is(tok::coloncolon)) ||
5132 (Left.is(tok::star) && Right.is(tok::semi))) {
5133 return false;
5135 // Add space in attribute like `(* ASYNC_REG = "TRUE" *)`.
5136 if (Left.endsSequence(tok::star, tok::l_paren) && Right.is(tok::identifier))
5137 return true;
5138 // Add space before drive strength like in `wire (strong1, pull0)`.
5139 if (Right.is(tok::l_paren) && Right.is(TT_VerilogStrength))
5140 return true;
5141 // Don't add space in a streaming concatenation like `{>>{j}}`.
5142 if ((Left.is(tok::l_brace) &&
5143 Right.isOneOf(tok::lessless, tok::greatergreater)) ||
5144 (Left.endsSequence(tok::lessless, tok::l_brace) ||
5145 Left.endsSequence(tok::greatergreater, tok::l_brace))) {
5146 return false;
5148 } else if (Style.isTableGen()) {
5149 // Avoid to connect [ and {. [{ is start token of multiline string.
5150 if (Left.is(tok::l_square) && Right.is(tok::l_brace))
5151 return true;
5152 if (Left.is(tok::r_brace) && Right.is(tok::r_square))
5153 return true;
5154 // Do not insert around colon in DAGArg and cond operator.
5155 if (Right.isOneOf(TT_TableGenDAGArgListColon,
5156 TT_TableGenDAGArgListColonToAlign) ||
5157 Left.isOneOf(TT_TableGenDAGArgListColon,
5158 TT_TableGenDAGArgListColonToAlign)) {
5159 return false;
5161 if (Right.is(TT_TableGenCondOperatorColon))
5162 return false;
5163 if (Left.isOneOf(TT_TableGenDAGArgOperatorID,
5164 TT_TableGenDAGArgOperatorToBreak) &&
5165 Right.isNot(TT_TableGenDAGArgCloser)) {
5166 return true;
5168 // Do not insert bang operators and consequent openers.
5169 if (Right.isOneOf(tok::l_paren, tok::less) &&
5170 Left.isOneOf(TT_TableGenBangOperator, TT_TableGenCondOperator)) {
5171 return false;
5173 // Trailing paste requires space before '{' or ':', the case in name values.
5174 // Not before ';', the case in normal values.
5175 if (Left.is(TT_TableGenTrailingPasteOperator) &&
5176 Right.isOneOf(tok::l_brace, tok::colon)) {
5177 return true;
5179 // Otherwise paste operator does not prefer space around.
5180 if (Left.is(tok::hash) || Right.is(tok::hash))
5181 return false;
5182 // Sure not to connect after defining keywords.
5183 if (Keywords.isTableGenDefinition(Left))
5184 return true;
5187 if (Left.is(TT_ImplicitStringLiteral))
5188 return Right.hasWhitespaceBefore();
5189 if (Line.Type == LT_ObjCMethodDecl) {
5190 if (Left.is(TT_ObjCMethodSpecifier))
5191 return true;
5192 if (Left.is(tok::r_paren) && Left.isNot(TT_AttributeRParen) &&
5193 canBeObjCSelectorComponent(Right)) {
5194 // Don't space between ')' and <id> or ')' and 'new'. 'new' is not a
5195 // keyword in Objective-C, and '+ (instancetype)new;' is a standard class
5196 // method declaration.
5197 return false;
5200 if (Line.Type == LT_ObjCProperty &&
5201 (Right.is(tok::equal) || Left.is(tok::equal))) {
5202 return false;
5205 if (Right.is(TT_TrailingReturnArrow) || Left.is(TT_TrailingReturnArrow))
5206 return true;
5208 if (Left.is(tok::comma) && Right.isNot(TT_OverloadedOperatorLParen) &&
5209 // In an unexpanded macro call we only find the parentheses and commas
5210 // in a line; the commas and closing parenthesis do not require a space.
5211 (Left.Children.empty() || !Left.MacroParent)) {
5212 return true;
5214 if (Right.is(tok::comma))
5215 return false;
5216 if (Right.is(TT_ObjCBlockLParen))
5217 return true;
5218 if (Right.is(TT_CtorInitializerColon))
5219 return Style.SpaceBeforeCtorInitializerColon;
5220 if (Right.is(TT_InheritanceColon) && !Style.SpaceBeforeInheritanceColon)
5221 return false;
5222 if (Right.is(TT_RangeBasedForLoopColon) &&
5223 !Style.SpaceBeforeRangeBasedForLoopColon) {
5224 return false;
5226 if (Left.is(TT_BitFieldColon)) {
5227 return Style.BitFieldColonSpacing == FormatStyle::BFCS_Both ||
5228 Style.BitFieldColonSpacing == FormatStyle::BFCS_After;
5230 if (Right.is(tok::colon)) {
5231 if (Right.is(TT_CaseLabelColon))
5232 return Style.SpaceBeforeCaseColon;
5233 if (Right.is(TT_GotoLabelColon))
5234 return false;
5235 // `private:` and `public:`.
5236 if (!Right.getNextNonComment())
5237 return false;
5238 if (Right.is(TT_ObjCMethodExpr))
5239 return false;
5240 if (Left.is(tok::question))
5241 return false;
5242 if (Right.is(TT_InlineASMColon) && Left.is(tok::coloncolon))
5243 return false;
5244 if (Right.is(TT_DictLiteral))
5245 return Style.SpacesInContainerLiterals;
5246 if (Right.is(TT_AttributeColon))
5247 return false;
5248 if (Right.is(TT_CSharpNamedArgumentColon))
5249 return false;
5250 if (Right.is(TT_GenericSelectionColon))
5251 return false;
5252 if (Right.is(TT_BitFieldColon)) {
5253 return Style.BitFieldColonSpacing == FormatStyle::BFCS_Both ||
5254 Style.BitFieldColonSpacing == FormatStyle::BFCS_Before;
5256 return true;
5258 // Do not merge "- -" into "--".
5259 if ((Left.isOneOf(tok::minus, tok::minusminus) &&
5260 Right.isOneOf(tok::minus, tok::minusminus)) ||
5261 (Left.isOneOf(tok::plus, tok::plusplus) &&
5262 Right.isOneOf(tok::plus, tok::plusplus))) {
5263 return true;
5265 if (Left.is(TT_UnaryOperator)) {
5266 if (Right.isNot(tok::l_paren)) {
5267 // The alternative operators for ~ and ! are "compl" and "not".
5268 // If they are used instead, we do not want to combine them with
5269 // the token to the right, unless that is a left paren.
5270 if (Left.is(tok::exclaim) && Left.TokenText == "not")
5271 return true;
5272 if (Left.is(tok::tilde) && Left.TokenText == "compl")
5273 return true;
5274 // Lambda captures allow for a lone &, so "&]" needs to be properly
5275 // handled.
5276 if (Left.is(tok::amp) && Right.is(tok::r_square))
5277 return Style.SpacesInSquareBrackets;
5279 return (Style.SpaceAfterLogicalNot && Left.is(tok::exclaim)) ||
5280 Right.is(TT_BinaryOperator);
5283 // If the next token is a binary operator or a selector name, we have
5284 // incorrectly classified the parenthesis as a cast. FIXME: Detect correctly.
5285 if (Left.is(TT_CastRParen)) {
5286 return Style.SpaceAfterCStyleCast ||
5287 Right.isOneOf(TT_BinaryOperator, TT_SelectorName);
5290 auto ShouldAddSpacesInAngles = [this, &Right]() {
5291 if (this->Style.SpacesInAngles == FormatStyle::SIAS_Always)
5292 return true;
5293 if (this->Style.SpacesInAngles == FormatStyle::SIAS_Leave)
5294 return Right.hasWhitespaceBefore();
5295 return false;
5298 if (Left.is(tok::greater) && Right.is(tok::greater)) {
5299 if (Style.Language == FormatStyle::LK_TextProto ||
5300 (Style.Language == FormatStyle::LK_Proto && Left.is(TT_DictLiteral))) {
5301 return !Style.Cpp11BracedListStyle;
5303 return Right.is(TT_TemplateCloser) && Left.is(TT_TemplateCloser) &&
5304 ((Style.Standard < FormatStyle::LS_Cpp11) ||
5305 ShouldAddSpacesInAngles());
5307 if (Right.isOneOf(tok::arrow, tok::arrowstar, tok::periodstar) ||
5308 Left.isOneOf(tok::arrow, tok::period, tok::arrowstar, tok::periodstar) ||
5309 (Right.is(tok::period) && Right.isNot(TT_DesignatedInitializerPeriod))) {
5310 return false;
5312 if (!Style.SpaceBeforeAssignmentOperators && Left.isNot(TT_TemplateCloser) &&
5313 Right.getPrecedence() == prec::Assignment) {
5314 return false;
5316 if (Style.Language == FormatStyle::LK_Java && Right.is(tok::coloncolon) &&
5317 (Left.is(tok::identifier) || Left.is(tok::kw_this))) {
5318 return false;
5320 if (Right.is(tok::coloncolon) && Left.is(tok::identifier)) {
5321 // Generally don't remove existing spaces between an identifier and "::".
5322 // The identifier might actually be a macro name such as ALWAYS_INLINE. If
5323 // this turns out to be too lenient, add analysis of the identifier itself.
5324 return Right.hasWhitespaceBefore();
5326 if (Right.is(tok::coloncolon) &&
5327 !Left.isOneOf(tok::l_brace, tok::comment, tok::l_paren)) {
5328 // Put a space between < and :: in vector< ::std::string >
5329 return (Left.is(TT_TemplateOpener) &&
5330 ((Style.Standard < FormatStyle::LS_Cpp11) ||
5331 ShouldAddSpacesInAngles())) ||
5332 !(Left.isOneOf(tok::l_paren, tok::r_paren, tok::l_square,
5333 tok::kw___super, TT_TemplateOpener,
5334 TT_TemplateCloser)) ||
5335 (Left.is(tok::l_paren) && Style.SpacesInParensOptions.Other);
5337 if ((Left.is(TT_TemplateOpener)) != (Right.is(TT_TemplateCloser)))
5338 return ShouldAddSpacesInAngles();
5339 // Space before TT_StructuredBindingLSquare.
5340 if (Right.is(TT_StructuredBindingLSquare)) {
5341 return !Left.isOneOf(tok::amp, tok::ampamp) ||
5342 getTokenReferenceAlignment(Left) != FormatStyle::PAS_Right;
5344 // Space before & or && following a TT_StructuredBindingLSquare.
5345 if (Right.Next && Right.Next->is(TT_StructuredBindingLSquare) &&
5346 Right.isOneOf(tok::amp, tok::ampamp)) {
5347 return getTokenReferenceAlignment(Right) != FormatStyle::PAS_Left;
5349 if ((Right.is(TT_BinaryOperator) && Left.isNot(tok::l_paren)) ||
5350 (Left.isOneOf(TT_BinaryOperator, TT_ConditionalExpr) &&
5351 Right.isNot(tok::r_paren))) {
5352 return true;
5354 if (Right.is(TT_TemplateOpener) && Left.is(tok::r_paren) &&
5355 Left.MatchingParen &&
5356 Left.MatchingParen->is(TT_OverloadedOperatorLParen)) {
5357 return false;
5359 if (Right.is(tok::less) && Left.isNot(tok::l_paren) &&
5360 Line.Type == LT_ImportStatement) {
5361 return true;
5363 if (Right.is(TT_TrailingUnaryOperator))
5364 return false;
5365 if (Left.is(TT_RegexLiteral))
5366 return false;
5367 return spaceRequiredBetween(Line, Left, Right);
5370 // Returns 'true' if 'Tok' is a brace we'd want to break before in Allman style.
5371 static bool isAllmanBrace(const FormatToken &Tok) {
5372 return Tok.is(tok::l_brace) && Tok.is(BK_Block) &&
5373 !Tok.isOneOf(TT_ObjCBlockLBrace, TT_LambdaLBrace, TT_DictLiteral);
5376 // Returns 'true' if 'Tok' is a function argument.
5377 static bool IsFunctionArgument(const FormatToken &Tok) {
5378 return Tok.MatchingParen && Tok.MatchingParen->Next &&
5379 Tok.MatchingParen->Next->isOneOf(tok::comma, tok::r_paren);
5382 static bool
5383 isItAnEmptyLambdaAllowed(const FormatToken &Tok,
5384 FormatStyle::ShortLambdaStyle ShortLambdaOption) {
5385 return Tok.Children.empty() && ShortLambdaOption != FormatStyle::SLS_None;
5388 static bool isAllmanLambdaBrace(const FormatToken &Tok) {
5389 return Tok.is(tok::l_brace) && Tok.is(BK_Block) &&
5390 !Tok.isOneOf(TT_ObjCBlockLBrace, TT_DictLiteral);
5393 bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
5394 const FormatToken &Right) const {
5395 const FormatToken &Left = *Right.Previous;
5396 if (Right.NewlinesBefore > 1 && Style.MaxEmptyLinesToKeep > 0)
5397 return true;
5399 if (Style.BreakFunctionDefinitionParameters && Line.MightBeFunctionDecl &&
5400 Line.mightBeFunctionDefinition() && Left.MightBeFunctionDeclParen &&
5401 Left.ParameterCount > 0) {
5402 return true;
5405 if (Style.isCSharp()) {
5406 if (Left.is(TT_FatArrow) && Right.is(tok::l_brace) &&
5407 Style.BraceWrapping.AfterFunction) {
5408 return true;
5410 if (Right.is(TT_CSharpNamedArgumentColon) ||
5411 Left.is(TT_CSharpNamedArgumentColon)) {
5412 return false;
5414 if (Right.is(TT_CSharpGenericTypeConstraint))
5415 return true;
5416 if (Right.Next && Right.Next->is(TT_FatArrow) &&
5417 (Right.is(tok::numeric_constant) ||
5418 (Right.is(tok::identifier) && Right.TokenText == "_"))) {
5419 return true;
5422 // Break after C# [...] and before public/protected/private/internal.
5423 if (Left.is(TT_AttributeSquare) && Left.is(tok::r_square) &&
5424 (Right.isAccessSpecifier(/*ColonRequired=*/false) ||
5425 Right.is(Keywords.kw_internal))) {
5426 return true;
5428 // Break between ] and [ but only when there are really 2 attributes.
5429 if (Left.is(TT_AttributeSquare) && Right.is(TT_AttributeSquare) &&
5430 Left.is(tok::r_square) && Right.is(tok::l_square)) {
5431 return true;
5434 } else if (Style.isJavaScript()) {
5435 // FIXME: This might apply to other languages and token kinds.
5436 if (Right.is(tok::string_literal) && Left.is(tok::plus) && Left.Previous &&
5437 Left.Previous->is(tok::string_literal)) {
5438 return true;
5440 if (Left.is(TT_DictLiteral) && Left.is(tok::l_brace) && Line.Level == 0 &&
5441 Left.Previous && Left.Previous->is(tok::equal) &&
5442 Line.First->isOneOf(tok::identifier, Keywords.kw_import, tok::kw_export,
5443 tok::kw_const) &&
5444 // kw_var/kw_let are pseudo-tokens that are tok::identifier, so match
5445 // above.
5446 !Line.First->isOneOf(Keywords.kw_var, Keywords.kw_let)) {
5447 // Object literals on the top level of a file are treated as "enum-style".
5448 // Each key/value pair is put on a separate line, instead of bin-packing.
5449 return true;
5451 if (Left.is(tok::l_brace) && Line.Level == 0 &&
5452 (Line.startsWith(tok::kw_enum) ||
5453 Line.startsWith(tok::kw_const, tok::kw_enum) ||
5454 Line.startsWith(tok::kw_export, tok::kw_enum) ||
5455 Line.startsWith(tok::kw_export, tok::kw_const, tok::kw_enum))) {
5456 // JavaScript top-level enum key/value pairs are put on separate lines
5457 // instead of bin-packing.
5458 return true;
5460 if (Right.is(tok::r_brace) && Left.is(tok::l_brace) && Left.Previous &&
5461 Left.Previous->is(TT_FatArrow)) {
5462 // JS arrow function (=> {...}).
5463 switch (Style.AllowShortLambdasOnASingleLine) {
5464 case FormatStyle::SLS_All:
5465 return false;
5466 case FormatStyle::SLS_None:
5467 return true;
5468 case FormatStyle::SLS_Empty:
5469 return !Left.Children.empty();
5470 case FormatStyle::SLS_Inline:
5471 // allow one-lining inline (e.g. in function call args) and empty arrow
5472 // functions.
5473 return (Left.NestingLevel == 0 && Line.Level == 0) &&
5474 !Left.Children.empty();
5476 llvm_unreachable("Unknown FormatStyle::ShortLambdaStyle enum");
5479 if (Right.is(tok::r_brace) && Left.is(tok::l_brace) &&
5480 !Left.Children.empty()) {
5481 // Support AllowShortFunctionsOnASingleLine for JavaScript.
5482 return Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_None ||
5483 Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_Empty ||
5484 (Left.NestingLevel == 0 && Line.Level == 0 &&
5485 Style.AllowShortFunctionsOnASingleLine &
5486 FormatStyle::SFS_InlineOnly);
5488 } else if (Style.Language == FormatStyle::LK_Java) {
5489 if (Right.is(tok::plus) && Left.is(tok::string_literal) && Right.Next &&
5490 Right.Next->is(tok::string_literal)) {
5491 return true;
5493 } else if (Style.isVerilog()) {
5494 // Break between assignments.
5495 if (Left.is(TT_VerilogAssignComma))
5496 return true;
5497 // Break between ports of different types.
5498 if (Left.is(TT_VerilogTypeComma))
5499 return true;
5500 // Break between ports in a module instantiation and after the parameter
5501 // list.
5502 if (Style.VerilogBreakBetweenInstancePorts &&
5503 (Left.is(TT_VerilogInstancePortComma) ||
5504 (Left.is(tok::r_paren) && Keywords.isVerilogIdentifier(Right) &&
5505 Left.MatchingParen &&
5506 Left.MatchingParen->is(TT_VerilogInstancePortLParen)))) {
5507 return true;
5509 // Break after labels. In Verilog labels don't have the 'case' keyword, so
5510 // it is hard to identify them in UnwrappedLineParser.
5511 if (!Keywords.isVerilogBegin(Right) && Keywords.isVerilogEndOfLabel(Left))
5512 return true;
5513 } else if (Style.BreakAdjacentStringLiterals &&
5514 (IsCpp || Style.isProto() ||
5515 Style.Language == FormatStyle::LK_TableGen)) {
5516 if (Left.isStringLiteral() && Right.isStringLiteral())
5517 return true;
5520 // Basic JSON newline processing.
5521 if (Style.isJson()) {
5522 // Always break after a JSON record opener.
5523 // {
5524 // }
5525 if (Left.is(TT_DictLiteral) && Left.is(tok::l_brace))
5526 return true;
5527 // Always break after a JSON array opener based on BreakArrays.
5528 if ((Left.is(TT_ArrayInitializerLSquare) && Left.is(tok::l_square) &&
5529 Right.isNot(tok::r_square)) ||
5530 Left.is(tok::comma)) {
5531 if (Right.is(tok::l_brace))
5532 return true;
5533 // scan to the right if an we see an object or an array inside
5534 // then break.
5535 for (const auto *Tok = &Right; Tok; Tok = Tok->Next) {
5536 if (Tok->isOneOf(tok::l_brace, tok::l_square))
5537 return true;
5538 if (Tok->isOneOf(tok::r_brace, tok::r_square))
5539 break;
5541 return Style.BreakArrays;
5544 if (Style.isTableGen()) {
5545 // Break the comma in side cond operators.
5546 // !cond(case1:1,
5547 // case2:0);
5548 if (Left.is(TT_TableGenCondOperatorComma))
5549 return true;
5550 if (Left.is(TT_TableGenDAGArgOperatorToBreak) &&
5551 Right.isNot(TT_TableGenDAGArgCloser)) {
5552 return true;
5554 if (Left.is(TT_TableGenDAGArgListCommaToBreak))
5555 return true;
5556 if (Right.is(TT_TableGenDAGArgCloser) && Right.MatchingParen &&
5557 Right.MatchingParen->is(TT_TableGenDAGArgOpenerToBreak) &&
5558 &Left != Right.MatchingParen->Next) {
5559 // Check to avoid empty DAGArg such as (ins).
5560 return Style.TableGenBreakInsideDAGArg == FormatStyle::DAS_BreakAll;
5564 if (Line.startsWith(tok::kw_asm) && Right.is(TT_InlineASMColon) &&
5565 Style.BreakBeforeInlineASMColon == FormatStyle::BBIAS_Always) {
5566 return true;
5569 // If the last token before a '}', ']', or ')' is a comma or a trailing
5570 // comment, the intention is to insert a line break after it in order to make
5571 // shuffling around entries easier. Import statements, especially in
5572 // JavaScript, can be an exception to this rule.
5573 if (Style.JavaScriptWrapImports || Line.Type != LT_ImportStatement) {
5574 const FormatToken *BeforeClosingBrace = nullptr;
5575 if ((Left.isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) ||
5576 (Style.isJavaScript() && Left.is(tok::l_paren))) &&
5577 Left.isNot(BK_Block) && Left.MatchingParen) {
5578 BeforeClosingBrace = Left.MatchingParen->Previous;
5579 } else if (Right.MatchingParen &&
5580 (Right.MatchingParen->isOneOf(tok::l_brace,
5581 TT_ArrayInitializerLSquare) ||
5582 (Style.isJavaScript() &&
5583 Right.MatchingParen->is(tok::l_paren)))) {
5584 BeforeClosingBrace = &Left;
5586 if (BeforeClosingBrace && (BeforeClosingBrace->is(tok::comma) ||
5587 BeforeClosingBrace->isTrailingComment())) {
5588 return true;
5592 if (Right.is(tok::comment)) {
5593 return Left.isNot(BK_BracedInit) && Left.isNot(TT_CtorInitializerColon) &&
5594 (Right.NewlinesBefore > 0 && Right.HasUnescapedNewline);
5596 if (Left.isTrailingComment())
5597 return true;
5598 if (Left.IsUnterminatedLiteral)
5599 return true;
5600 if (Right.is(tok::lessless) && Right.Next && Left.is(tok::string_literal) &&
5601 Right.Next->is(tok::string_literal)) {
5602 return true;
5604 if (Right.is(TT_RequiresClause)) {
5605 switch (Style.RequiresClausePosition) {
5606 case FormatStyle::RCPS_OwnLine:
5607 case FormatStyle::RCPS_WithFollowing:
5608 return true;
5609 default:
5610 break;
5613 // Can break after template<> declaration
5614 if (Left.ClosesTemplateDeclaration && Left.MatchingParen &&
5615 Left.MatchingParen->NestingLevel == 0) {
5616 // Put concepts on the next line e.g.
5617 // template<typename T>
5618 // concept ...
5619 if (Right.is(tok::kw_concept))
5620 return Style.BreakBeforeConceptDeclarations == FormatStyle::BBCDS_Always;
5621 return Style.BreakTemplateDeclarations == FormatStyle::BTDS_Yes ||
5622 (Style.BreakTemplateDeclarations == FormatStyle::BTDS_Leave &&
5623 Right.NewlinesBefore > 0);
5625 if (Left.ClosesRequiresClause && Right.isNot(tok::semi)) {
5626 switch (Style.RequiresClausePosition) {
5627 case FormatStyle::RCPS_OwnLine:
5628 case FormatStyle::RCPS_WithPreceding:
5629 return true;
5630 default:
5631 break;
5634 if (Style.PackConstructorInitializers == FormatStyle::PCIS_Never) {
5635 if (Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeColon &&
5636 (Left.is(TT_CtorInitializerComma) ||
5637 Right.is(TT_CtorInitializerColon))) {
5638 return true;
5641 if (Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon &&
5642 Left.isOneOf(TT_CtorInitializerColon, TT_CtorInitializerComma)) {
5643 return true;
5646 if (Style.PackConstructorInitializers < FormatStyle::PCIS_CurrentLine &&
5647 Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma &&
5648 Right.isOneOf(TT_CtorInitializerComma, TT_CtorInitializerColon)) {
5649 return true;
5651 if (Style.PackConstructorInitializers == FormatStyle::PCIS_NextLineOnly) {
5652 if ((Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeColon ||
5653 Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma) &&
5654 Right.is(TT_CtorInitializerColon)) {
5655 return true;
5658 if (Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon &&
5659 Left.is(TT_CtorInitializerColon)) {
5660 return true;
5663 // Break only if we have multiple inheritance.
5664 if (Style.BreakInheritanceList == FormatStyle::BILS_BeforeComma &&
5665 Right.is(TT_InheritanceComma)) {
5666 return true;
5668 if (Style.BreakInheritanceList == FormatStyle::BILS_AfterComma &&
5669 Left.is(TT_InheritanceComma)) {
5670 return true;
5672 if (Right.is(tok::string_literal) && Right.TokenText.starts_with("R\"")) {
5673 // Multiline raw string literals are special wrt. line breaks. The author
5674 // has made a deliberate choice and might have aligned the contents of the
5675 // string literal accordingly. Thus, we try keep existing line breaks.
5676 return Right.IsMultiline && Right.NewlinesBefore > 0;
5678 if ((Left.is(tok::l_brace) || (Left.is(tok::less) && Left.Previous &&
5679 Left.Previous->is(tok::equal))) &&
5680 Right.NestingLevel == 1 && Style.Language == FormatStyle::LK_Proto) {
5681 // Don't put enums or option definitions onto single lines in protocol
5682 // buffers.
5683 return true;
5685 if (Right.is(TT_InlineASMBrace))
5686 return Right.HasUnescapedNewline;
5688 if (isAllmanBrace(Left) || isAllmanBrace(Right)) {
5689 auto *FirstNonComment = Line.getFirstNonComment();
5690 bool AccessSpecifier =
5691 FirstNonComment &&
5692 FirstNonComment->isOneOf(Keywords.kw_internal, tok::kw_public,
5693 tok::kw_private, tok::kw_protected);
5695 if (Style.BraceWrapping.AfterEnum) {
5696 if (Line.startsWith(tok::kw_enum) ||
5697 Line.startsWith(tok::kw_typedef, tok::kw_enum)) {
5698 return true;
5700 // Ensure BraceWrapping for `public enum A {`.
5701 if (AccessSpecifier && FirstNonComment->Next &&
5702 FirstNonComment->Next->is(tok::kw_enum)) {
5703 return true;
5707 // Ensure BraceWrapping for `public interface A {`.
5708 if (Style.BraceWrapping.AfterClass &&
5709 ((AccessSpecifier && FirstNonComment->Next &&
5710 FirstNonComment->Next->is(Keywords.kw_interface)) ||
5711 Line.startsWith(Keywords.kw_interface))) {
5712 return true;
5715 // Don't attempt to interpret struct return types as structs.
5716 if (Right.isNot(TT_FunctionLBrace)) {
5717 return (Line.startsWith(tok::kw_class) &&
5718 Style.BraceWrapping.AfterClass) ||
5719 (Line.startsWith(tok::kw_struct) &&
5720 Style.BraceWrapping.AfterStruct);
5724 if (Left.is(TT_ObjCBlockLBrace) &&
5725 Style.AllowShortBlocksOnASingleLine == FormatStyle::SBS_Never) {
5726 return true;
5729 // Ensure wrapping after __attribute__((XX)) and @interface etc.
5730 if (Left.isOneOf(TT_AttributeRParen, TT_AttributeMacro) &&
5731 Right.is(TT_ObjCDecl)) {
5732 return true;
5735 if (Left.is(TT_LambdaLBrace)) {
5736 if (IsFunctionArgument(Left) &&
5737 Style.AllowShortLambdasOnASingleLine == FormatStyle::SLS_Inline) {
5738 return false;
5741 if (Style.AllowShortLambdasOnASingleLine == FormatStyle::SLS_None ||
5742 Style.AllowShortLambdasOnASingleLine == FormatStyle::SLS_Inline ||
5743 (!Left.Children.empty() &&
5744 Style.AllowShortLambdasOnASingleLine == FormatStyle::SLS_Empty)) {
5745 return true;
5749 if (Style.BraceWrapping.BeforeLambdaBody && Right.is(TT_LambdaLBrace) &&
5750 (Left.isPointerOrReference() || Left.is(TT_TemplateCloser))) {
5751 return true;
5754 // Put multiple Java annotation on a new line.
5755 if ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
5756 Left.is(TT_LeadingJavaAnnotation) &&
5757 Right.isNot(TT_LeadingJavaAnnotation) && Right.isNot(tok::l_paren) &&
5758 (Line.Last->is(tok::l_brace) || Style.BreakAfterJavaFieldAnnotations)) {
5759 return true;
5762 if (Right.is(TT_ProtoExtensionLSquare))
5763 return true;
5765 // In text proto instances if a submessage contains at least 2 entries and at
5766 // least one of them is a submessage, like A { ... B { ... } ... },
5767 // put all of the entries of A on separate lines by forcing the selector of
5768 // the submessage B to be put on a newline.
5770 // Example: these can stay on one line:
5771 // a { scalar_1: 1 scalar_2: 2 }
5772 // a { b { key: value } }
5774 // and these entries need to be on a new line even if putting them all in one
5775 // line is under the column limit:
5776 // a {
5777 // scalar: 1
5778 // b { key: value }
5779 // }
5781 // We enforce this by breaking before a submessage field that has previous
5782 // siblings, *and* breaking before a field that follows a submessage field.
5784 // Be careful to exclude the case [proto.ext] { ... } since the `]` is
5785 // the TT_SelectorName there, but we don't want to break inside the brackets.
5787 // Another edge case is @submessage { key: value }, which is a common
5788 // substitution placeholder. In this case we want to keep `@` and `submessage`
5789 // together.
5791 // We ensure elsewhere that extensions are always on their own line.
5792 if (Style.isProto() && Right.is(TT_SelectorName) &&
5793 Right.isNot(tok::r_square) && Right.Next) {
5794 // Keep `@submessage` together in:
5795 // @submessage { key: value }
5796 if (Left.is(tok::at))
5797 return false;
5798 // Look for the scope opener after selector in cases like:
5799 // selector { ...
5800 // selector: { ...
5801 // selector: @base { ...
5802 FormatToken *LBrace = Right.Next;
5803 if (LBrace && LBrace->is(tok::colon)) {
5804 LBrace = LBrace->Next;
5805 if (LBrace && LBrace->is(tok::at)) {
5806 LBrace = LBrace->Next;
5807 if (LBrace)
5808 LBrace = LBrace->Next;
5811 if (LBrace &&
5812 // The scope opener is one of {, [, <:
5813 // selector { ... }
5814 // selector [ ... ]
5815 // selector < ... >
5817 // In case of selector { ... }, the l_brace is TT_DictLiteral.
5818 // In case of an empty selector {}, the l_brace is not TT_DictLiteral,
5819 // so we check for immediately following r_brace.
5820 ((LBrace->is(tok::l_brace) &&
5821 (LBrace->is(TT_DictLiteral) ||
5822 (LBrace->Next && LBrace->Next->is(tok::r_brace)))) ||
5823 LBrace->is(TT_ArrayInitializerLSquare) || LBrace->is(tok::less))) {
5824 // If Left.ParameterCount is 0, then this submessage entry is not the
5825 // first in its parent submessage, and we want to break before this entry.
5826 // If Left.ParameterCount is greater than 0, then its parent submessage
5827 // might contain 1 or more entries and we want to break before this entry
5828 // if it contains at least 2 entries. We deal with this case later by
5829 // detecting and breaking before the next entry in the parent submessage.
5830 if (Left.ParameterCount == 0)
5831 return true;
5832 // However, if this submessage is the first entry in its parent
5833 // submessage, Left.ParameterCount might be 1 in some cases.
5834 // We deal with this case later by detecting an entry
5835 // following a closing paren of this submessage.
5838 // If this is an entry immediately following a submessage, it will be
5839 // preceded by a closing paren of that submessage, like in:
5840 // left---. .---right
5841 // v v
5842 // sub: { ... } key: value
5843 // If there was a comment between `}` an `key` above, then `key` would be
5844 // put on a new line anyways.
5845 if (Left.isOneOf(tok::r_brace, tok::greater, tok::r_square))
5846 return true;
5849 return false;
5852 bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
5853 const FormatToken &Right) const {
5854 const FormatToken &Left = *Right.Previous;
5855 // Language-specific stuff.
5856 if (Style.isCSharp()) {
5857 if (Left.isOneOf(TT_CSharpNamedArgumentColon, TT_AttributeColon) ||
5858 Right.isOneOf(TT_CSharpNamedArgumentColon, TT_AttributeColon)) {
5859 return false;
5861 // Only break after commas for generic type constraints.
5862 if (Line.First->is(TT_CSharpGenericTypeConstraint))
5863 return Left.is(TT_CSharpGenericTypeConstraintComma);
5864 // Keep nullable operators attached to their identifiers.
5865 if (Right.is(TT_CSharpNullable))
5866 return false;
5867 } else if (Style.Language == FormatStyle::LK_Java) {
5868 if (Left.isOneOf(Keywords.kw_throws, Keywords.kw_extends,
5869 Keywords.kw_implements)) {
5870 return false;
5872 if (Right.isOneOf(Keywords.kw_throws, Keywords.kw_extends,
5873 Keywords.kw_implements)) {
5874 return true;
5876 } else if (Style.isJavaScript()) {
5877 const FormatToken *NonComment = Right.getPreviousNonComment();
5878 if (NonComment &&
5879 NonComment->isOneOf(
5880 tok::kw_return, Keywords.kw_yield, tok::kw_continue, tok::kw_break,
5881 tok::kw_throw, Keywords.kw_interface, Keywords.kw_type,
5882 tok::kw_static, tok::kw_public, tok::kw_private, tok::kw_protected,
5883 Keywords.kw_readonly, Keywords.kw_override, Keywords.kw_abstract,
5884 Keywords.kw_get, Keywords.kw_set, Keywords.kw_async,
5885 Keywords.kw_await)) {
5886 return false; // Otherwise automatic semicolon insertion would trigger.
5888 if (Right.NestingLevel == 0 &&
5889 (Left.Tok.getIdentifierInfo() ||
5890 Left.isOneOf(tok::r_square, tok::r_paren)) &&
5891 Right.isOneOf(tok::l_square, tok::l_paren)) {
5892 return false; // Otherwise automatic semicolon insertion would trigger.
5894 if (NonComment && NonComment->is(tok::identifier) &&
5895 NonComment->TokenText == "asserts") {
5896 return false;
5898 if (Left.is(TT_FatArrow) && Right.is(tok::l_brace))
5899 return false;
5900 if (Left.is(TT_JsTypeColon))
5901 return true;
5902 // Don't wrap between ":" and "!" of a strict prop init ("field!: type;").
5903 if (Left.is(tok::exclaim) && Right.is(tok::colon))
5904 return false;
5905 // Look for is type annotations like:
5906 // function f(): a is B { ... }
5907 // Do not break before is in these cases.
5908 if (Right.is(Keywords.kw_is)) {
5909 const FormatToken *Next = Right.getNextNonComment();
5910 // If `is` is followed by a colon, it's likely that it's a dict key, so
5911 // ignore it for this check.
5912 // For example this is common in Polymer:
5913 // Polymer({
5914 // is: 'name',
5915 // ...
5916 // });
5917 if (!Next || Next->isNot(tok::colon))
5918 return false;
5920 if (Left.is(Keywords.kw_in))
5921 return Style.BreakBeforeBinaryOperators == FormatStyle::BOS_None;
5922 if (Right.is(Keywords.kw_in))
5923 return Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None;
5924 if (Right.is(Keywords.kw_as))
5925 return false; // must not break before as in 'x as type' casts
5926 if (Right.isOneOf(Keywords.kw_extends, Keywords.kw_infer)) {
5927 // extends and infer can appear as keywords in conditional types:
5928 // https://www.typescriptlang.org/docs/handbook/release-notes/typescript-2-8.html#conditional-types
5929 // do not break before them, as the expressions are subject to ASI.
5930 return false;
5932 if (Left.is(Keywords.kw_as))
5933 return true;
5934 if (Left.is(TT_NonNullAssertion))
5935 return true;
5936 if (Left.is(Keywords.kw_declare) &&
5937 Right.isOneOf(Keywords.kw_module, tok::kw_namespace,
5938 Keywords.kw_function, tok::kw_class, tok::kw_enum,
5939 Keywords.kw_interface, Keywords.kw_type, Keywords.kw_var,
5940 Keywords.kw_let, tok::kw_const)) {
5941 // See grammar for 'declare' statements at:
5942 // https://github.com/Microsoft/TypeScript/blob/main/doc/spec-ARCHIVED.md#A.10
5943 return false;
5945 if (Left.isOneOf(Keywords.kw_module, tok::kw_namespace) &&
5946 Right.isOneOf(tok::identifier, tok::string_literal)) {
5947 return false; // must not break in "module foo { ...}"
5949 if (Right.is(TT_TemplateString) && Right.closesScope())
5950 return false;
5951 // Don't split tagged template literal so there is a break between the tag
5952 // identifier and template string.
5953 if (Left.is(tok::identifier) && Right.is(TT_TemplateString))
5954 return false;
5955 if (Left.is(TT_TemplateString) && Left.opensScope())
5956 return true;
5957 } else if (Style.isTableGen()) {
5958 // Avoid to break after "def", "class", "let" and so on.
5959 if (Keywords.isTableGenDefinition(Left))
5960 return false;
5961 // Avoid to break after '(' in the cases that is in bang operators.
5962 if (Right.is(tok::l_paren)) {
5963 return !Left.isOneOf(TT_TableGenBangOperator, TT_TableGenCondOperator,
5964 TT_TemplateCloser);
5966 // Avoid to break between the value and its suffix part.
5967 if (Left.is(TT_TableGenValueSuffix))
5968 return false;
5969 // Avoid to break around paste operator.
5970 if (Left.is(tok::hash) || Right.is(tok::hash))
5971 return false;
5972 if (Left.isOneOf(TT_TableGenBangOperator, TT_TableGenCondOperator))
5973 return false;
5976 if (Left.is(tok::at))
5977 return false;
5978 if (Left.Tok.getObjCKeywordID() == tok::objc_interface)
5979 return false;
5980 if (Left.isOneOf(TT_JavaAnnotation, TT_LeadingJavaAnnotation))
5981 return Right.isNot(tok::l_paren);
5982 if (Right.is(TT_PointerOrReference)) {
5983 return Line.IsMultiVariableDeclStmt ||
5984 (getTokenPointerOrReferenceAlignment(Right) ==
5985 FormatStyle::PAS_Right &&
5986 (!Right.Next || Right.Next->isNot(TT_FunctionDeclarationName)));
5988 if (Right.isOneOf(TT_StartOfName, TT_FunctionDeclarationName) ||
5989 Right.is(tok::kw_operator)) {
5990 return true;
5992 if (Left.is(TT_PointerOrReference))
5993 return false;
5994 if (Right.isTrailingComment()) {
5995 // We rely on MustBreakBefore being set correctly here as we should not
5996 // change the "binding" behavior of a comment.
5997 // The first comment in a braced lists is always interpreted as belonging to
5998 // the first list element. Otherwise, it should be placed outside of the
5999 // list.
6000 return Left.is(BK_BracedInit) ||
6001 (Left.is(TT_CtorInitializerColon) && Right.NewlinesBefore > 0 &&
6002 Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon);
6004 if (Left.is(tok::question) && Right.is(tok::colon))
6005 return false;
6006 if (Right.is(TT_ConditionalExpr) || Right.is(tok::question))
6007 return Style.BreakBeforeTernaryOperators;
6008 if (Left.is(TT_ConditionalExpr) || Left.is(tok::question))
6009 return !Style.BreakBeforeTernaryOperators;
6010 if (Left.is(TT_InheritanceColon))
6011 return Style.BreakInheritanceList == FormatStyle::BILS_AfterColon;
6012 if (Right.is(TT_InheritanceColon))
6013 return Style.BreakInheritanceList != FormatStyle::BILS_AfterColon;
6014 if (Right.is(TT_ObjCMethodExpr) && Right.isNot(tok::r_square) &&
6015 Left.isNot(TT_SelectorName)) {
6016 return true;
6019 if (Right.is(tok::colon) &&
6020 !Right.isOneOf(TT_CtorInitializerColon, TT_InlineASMColon)) {
6021 return false;
6023 if (Left.is(tok::colon) && Left.isOneOf(TT_DictLiteral, TT_ObjCMethodExpr)) {
6024 if (Style.isProto()) {
6025 if (!Style.AlwaysBreakBeforeMultilineStrings && Right.isStringLiteral())
6026 return false;
6027 // Prevent cases like:
6029 // submessage:
6030 // { key: valueeeeeeeeeeee }
6032 // when the snippet does not fit into one line.
6033 // Prefer:
6035 // submessage: {
6036 // key: valueeeeeeeeeeee
6037 // }
6039 // instead, even if it is longer by one line.
6041 // Note that this allows the "{" to go over the column limit
6042 // when the column limit is just between ":" and "{", but that does
6043 // not happen too often and alternative formattings in this case are
6044 // not much better.
6046 // The code covers the cases:
6048 // submessage: { ... }
6049 // submessage: < ... >
6050 // repeated: [ ... ]
6051 if (((Right.is(tok::l_brace) || Right.is(tok::less)) &&
6052 Right.is(TT_DictLiteral)) ||
6053 Right.is(TT_ArrayInitializerLSquare)) {
6054 return false;
6057 return true;
6059 if (Right.is(tok::r_square) && Right.MatchingParen &&
6060 Right.MatchingParen->is(TT_ProtoExtensionLSquare)) {
6061 return false;
6063 if (Right.is(TT_SelectorName) || (Right.is(tok::identifier) && Right.Next &&
6064 Right.Next->is(TT_ObjCMethodExpr))) {
6065 return Left.isNot(tok::period); // FIXME: Properly parse ObjC calls.
6067 if (Left.is(tok::r_paren) && Line.Type == LT_ObjCProperty)
6068 return true;
6069 if (Right.is(tok::kw_concept))
6070 return Style.BreakBeforeConceptDeclarations != FormatStyle::BBCDS_Never;
6071 if (Right.is(TT_RequiresClause))
6072 return true;
6073 if (Left.ClosesTemplateDeclaration) {
6074 return Style.BreakTemplateDeclarations != FormatStyle::BTDS_Leave ||
6075 Right.NewlinesBefore > 0;
6077 if (Left.is(TT_FunctionAnnotationRParen))
6078 return true;
6079 if (Left.ClosesRequiresClause)
6080 return true;
6081 if (Right.isOneOf(TT_RangeBasedForLoopColon, TT_OverloadedOperatorLParen,
6082 TT_OverloadedOperator)) {
6083 return false;
6085 if (Left.is(TT_RangeBasedForLoopColon))
6086 return true;
6087 if (Right.is(TT_RangeBasedForLoopColon))
6088 return false;
6089 if (Left.is(TT_TemplateCloser) && Right.is(TT_TemplateOpener))
6090 return true;
6091 if ((Left.is(tok::greater) && Right.is(tok::greater)) ||
6092 (Left.is(tok::less) && Right.is(tok::less))) {
6093 return false;
6095 if (Right.is(TT_BinaryOperator) &&
6096 Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None &&
6097 (Style.BreakBeforeBinaryOperators == FormatStyle::BOS_All ||
6098 Right.getPrecedence() != prec::Assignment)) {
6099 return true;
6101 if (Left.isOneOf(TT_TemplateCloser, TT_UnaryOperator) ||
6102 Left.is(tok::kw_operator)) {
6103 return false;
6105 if (Left.is(tok::equal) && !Right.isOneOf(tok::kw_default, tok::kw_delete) &&
6106 Line.Type == LT_VirtualFunctionDecl && Left.NestingLevel == 0) {
6107 return false;
6109 if (Left.is(tok::equal) && Right.is(tok::l_brace) &&
6110 !Style.Cpp11BracedListStyle) {
6111 return false;
6113 if (Left.is(TT_AttributeLParen) ||
6114 (Left.is(tok::l_paren) && Left.is(TT_TypeDeclarationParen))) {
6115 return false;
6117 if (Left.is(tok::l_paren) && Left.Previous &&
6118 (Left.Previous->isOneOf(TT_BinaryOperator, TT_CastRParen))) {
6119 return false;
6121 if (Right.is(TT_ImplicitStringLiteral))
6122 return false;
6124 if (Right.is(TT_TemplateCloser))
6125 return false;
6126 if (Right.is(tok::r_square) && Right.MatchingParen &&
6127 Right.MatchingParen->is(TT_LambdaLSquare)) {
6128 return false;
6131 // We only break before r_brace if there was a corresponding break before
6132 // the l_brace, which is tracked by BreakBeforeClosingBrace.
6133 if (Right.is(tok::r_brace)) {
6134 return Right.MatchingParen && (Right.MatchingParen->is(BK_Block) ||
6135 (Right.isBlockIndentedInitRBrace(Style)));
6138 // We only break before r_paren if we're in a block indented context.
6139 if (Right.is(tok::r_paren)) {
6140 if (Style.AlignAfterOpenBracket != FormatStyle::BAS_BlockIndent ||
6141 !Right.MatchingParen) {
6142 return false;
6144 auto Next = Right.Next;
6145 if (Next && Next->is(tok::r_paren))
6146 Next = Next->Next;
6147 if (Next && Next->is(tok::l_paren))
6148 return false;
6149 const FormatToken *Previous = Right.MatchingParen->Previous;
6150 return !(Previous && (Previous->is(tok::kw_for) || Previous->isIf()));
6153 // Allow breaking after a trailing annotation, e.g. after a method
6154 // declaration.
6155 if (Left.is(TT_TrailingAnnotation)) {
6156 return !Right.isOneOf(tok::l_brace, tok::semi, tok::equal, tok::l_paren,
6157 tok::less, tok::coloncolon);
6160 if (Right.isAttribute())
6161 return true;
6163 if (Right.is(tok::l_square) && Right.is(TT_AttributeSquare))
6164 return Left.isNot(TT_AttributeSquare);
6166 if (Left.is(tok::identifier) && Right.is(tok::string_literal))
6167 return true;
6169 if (Right.is(tok::identifier) && Right.Next && Right.Next->is(TT_DictLiteral))
6170 return true;
6172 if (Left.is(TT_CtorInitializerColon)) {
6173 return Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon &&
6174 (!Right.isTrailingComment() || Right.NewlinesBefore > 0);
6176 if (Right.is(TT_CtorInitializerColon))
6177 return Style.BreakConstructorInitializers != FormatStyle::BCIS_AfterColon;
6178 if (Left.is(TT_CtorInitializerComma) &&
6179 Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma) {
6180 return false;
6182 if (Right.is(TT_CtorInitializerComma) &&
6183 Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma) {
6184 return true;
6186 if (Left.is(TT_InheritanceComma) &&
6187 Style.BreakInheritanceList == FormatStyle::BILS_BeforeComma) {
6188 return false;
6190 if (Right.is(TT_InheritanceComma) &&
6191 Style.BreakInheritanceList == FormatStyle::BILS_BeforeComma) {
6192 return true;
6194 if (Left.is(TT_ArrayInitializerLSquare))
6195 return true;
6196 if (Right.is(tok::kw_typename) && Left.isNot(tok::kw_const))
6197 return true;
6198 if ((Left.isBinaryOperator() || Left.is(TT_BinaryOperator)) &&
6199 !Left.isOneOf(tok::arrowstar, tok::lessless) &&
6200 Style.BreakBeforeBinaryOperators != FormatStyle::BOS_All &&
6201 (Style.BreakBeforeBinaryOperators == FormatStyle::BOS_None ||
6202 Left.getPrecedence() == prec::Assignment)) {
6203 return true;
6205 if ((Left.is(TT_AttributeSquare) && Right.is(tok::l_square)) ||
6206 (Left.is(tok::r_square) && Right.is(TT_AttributeSquare))) {
6207 return false;
6210 auto ShortLambdaOption = Style.AllowShortLambdasOnASingleLine;
6211 if (Style.BraceWrapping.BeforeLambdaBody && Right.is(TT_LambdaLBrace)) {
6212 if (isAllmanLambdaBrace(Left))
6213 return !isItAnEmptyLambdaAllowed(Left, ShortLambdaOption);
6214 if (isAllmanLambdaBrace(Right))
6215 return !isItAnEmptyLambdaAllowed(Right, ShortLambdaOption);
6218 if (Right.is(tok::kw_noexcept) && Right.is(TT_TrailingAnnotation)) {
6219 switch (Style.AllowBreakBeforeNoexceptSpecifier) {
6220 case FormatStyle::BBNSS_Never:
6221 return false;
6222 case FormatStyle::BBNSS_Always:
6223 return true;
6224 case FormatStyle::BBNSS_OnlyWithParen:
6225 return Right.Next && Right.Next->is(tok::l_paren);
6229 return Left.isOneOf(tok::comma, tok::coloncolon, tok::semi, tok::l_brace,
6230 tok::kw_class, tok::kw_struct, tok::comment) ||
6231 Right.isMemberAccess() ||
6232 Right.isOneOf(TT_TrailingReturnArrow, tok::lessless, tok::colon,
6233 tok::l_square, tok::at) ||
6234 (Left.is(tok::r_paren) &&
6235 Right.isOneOf(tok::identifier, tok::kw_const)) ||
6236 (Left.is(tok::l_paren) && Right.isNot(tok::r_paren)) ||
6237 (Left.is(TT_TemplateOpener) && Right.isNot(TT_TemplateCloser));
6240 void TokenAnnotator::printDebugInfo(const AnnotatedLine &Line) const {
6241 llvm::errs() << "AnnotatedTokens(L=" << Line.Level << ", P=" << Line.PPLevel
6242 << ", T=" << Line.Type << ", C=" << Line.IsContinuation
6243 << "):\n";
6244 const FormatToken *Tok = Line.First;
6245 while (Tok) {
6246 llvm::errs() << " M=" << Tok->MustBreakBefore
6247 << " C=" << Tok->CanBreakBefore
6248 << " T=" << getTokenTypeName(Tok->getType())
6249 << " S=" << Tok->SpacesRequiredBefore
6250 << " F=" << Tok->Finalized << " B=" << Tok->BlockParameterCount
6251 << " BK=" << Tok->getBlockKind() << " P=" << Tok->SplitPenalty
6252 << " Name=" << Tok->Tok.getName() << " L=" << Tok->TotalLength
6253 << " PPK=" << Tok->getPackingKind() << " FakeLParens=";
6254 for (prec::Level LParen : Tok->FakeLParens)
6255 llvm::errs() << LParen << "/";
6256 llvm::errs() << " FakeRParens=" << Tok->FakeRParens;
6257 llvm::errs() << " II=" << Tok->Tok.getIdentifierInfo();
6258 llvm::errs() << " Text='" << Tok->TokenText << "'\n";
6259 if (!Tok->Next)
6260 assert(Tok == Line.Last);
6261 Tok = Tok->Next;
6263 llvm::errs() << "----\n";
6266 FormatStyle::PointerAlignmentStyle
6267 TokenAnnotator::getTokenReferenceAlignment(const FormatToken &Reference) const {
6268 assert(Reference.isOneOf(tok::amp, tok::ampamp));
6269 switch (Style.ReferenceAlignment) {
6270 case FormatStyle::RAS_Pointer:
6271 return Style.PointerAlignment;
6272 case FormatStyle::RAS_Left:
6273 return FormatStyle::PAS_Left;
6274 case FormatStyle::RAS_Right:
6275 return FormatStyle::PAS_Right;
6276 case FormatStyle::RAS_Middle:
6277 return FormatStyle::PAS_Middle;
6279 assert(0); //"Unhandled value of ReferenceAlignment"
6280 return Style.PointerAlignment;
6283 FormatStyle::PointerAlignmentStyle
6284 TokenAnnotator::getTokenPointerOrReferenceAlignment(
6285 const FormatToken &PointerOrReference) const {
6286 if (PointerOrReference.isOneOf(tok::amp, tok::ampamp)) {
6287 switch (Style.ReferenceAlignment) {
6288 case FormatStyle::RAS_Pointer:
6289 return Style.PointerAlignment;
6290 case FormatStyle::RAS_Left:
6291 return FormatStyle::PAS_Left;
6292 case FormatStyle::RAS_Right:
6293 return FormatStyle::PAS_Right;
6294 case FormatStyle::RAS_Middle:
6295 return FormatStyle::PAS_Middle;
6298 assert(PointerOrReference.is(tok::star));
6299 return Style.PointerAlignment;
6302 } // namespace format
6303 } // namespace clang