1 //===--- SemaChecking.cpp - Extra Semantic Checking -----------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements extra semantic analysis beyond what is enforced
11 // by the C type system.
13 //===----------------------------------------------------------------------===//
16 #include "clang/AST/ASTContext.h"
17 #include "clang/AST/DeclObjC.h"
18 #include "clang/AST/ExprCXX.h"
19 #include "clang/AST/ExprObjC.h"
20 #include "clang/Lex/Preprocessor.h"
21 #include "clang/Basic/Diagnostic.h"
23 using namespace clang
;
25 /// CheckFunctionCall - Check a direct function call for various correctness
26 /// and safety properties not strictly enforced by the C type system.
28 Sema::CheckFunctionCall(FunctionDecl
*FDecl
, CallExpr
*TheCallRaw
) {
29 llvm::OwningPtr
<CallExpr
> TheCall(TheCallRaw
);
30 // Get the IdentifierInfo* for the called function.
31 IdentifierInfo
*FnInfo
= FDecl
->getIdentifier();
33 switch (FnInfo
->getBuiltinID()) {
34 case Builtin::BI__builtin___CFStringMakeConstantString
:
35 assert(TheCall
->getNumArgs() == 1 &&
36 "Wrong # arguments to builtin CFStringMakeConstantString");
37 if (CheckBuiltinCFStringArgument(TheCall
->getArg(0)))
39 return TheCall
.take();
40 case Builtin::BI__builtin_stdarg_start
:
41 case Builtin::BI__builtin_va_start
:
42 if (SemaBuiltinVAStart(TheCall
.get()))
44 return TheCall
.take();
45 case Builtin::BI__builtin_isgreater
:
46 case Builtin::BI__builtin_isgreaterequal
:
47 case Builtin::BI__builtin_isless
:
48 case Builtin::BI__builtin_islessequal
:
49 case Builtin::BI__builtin_islessgreater
:
50 case Builtin::BI__builtin_isunordered
:
51 if (SemaBuiltinUnorderedCompare(TheCall
.get()))
53 return TheCall
.take();
54 case Builtin::BI__builtin_return_address
:
55 case Builtin::BI__builtin_frame_address
:
56 if (SemaBuiltinStackAddress(TheCall
.get()))
58 return TheCall
.take();
59 case Builtin::BI__builtin_shufflevector
:
60 return SemaBuiltinShuffleVector(TheCall
.get());
61 case Builtin::BI__builtin_prefetch
:
62 if (SemaBuiltinPrefetch(TheCall
.get()))
64 return TheCall
.take();
65 case Builtin::BI__builtin_object_size
:
66 if (SemaBuiltinObjectSize(TheCall
.get()))
70 // FIXME: This mechanism should be abstracted to be less fragile and
71 // more efficient. For example, just map function ids to custom
74 // Search the KnownFunctionIDs for the identifier.
75 unsigned i
= 0, e
= id_num_known_functions
;
76 for (; i
!= e
; ++i
) { if (KnownFunctionIDs
[i
] == FnInfo
) break; }
77 if (i
== e
) return TheCall
.take();
80 if (i
<= id_vprintf
) {
81 // Retrieve the index of the format string parameter and determine
82 // if the function is passed a va_arg argument.
83 unsigned format_idx
= 0;
84 bool HasVAListArg
= false;
87 default: assert(false && "No format string argument index.");
88 case id_NSLog
: format_idx
= 0; break;
89 case id_asprintf
: format_idx
= 1; break;
90 case id_fprintf
: format_idx
= 1; break;
91 case id_printf
: format_idx
= 0; break;
92 case id_snprintf
: format_idx
= 2; break;
93 case id_snprintf_chk
: format_idx
= 4; break;
94 case id_sprintf
: format_idx
= 1; break;
95 case id_sprintf_chk
: format_idx
= 3; break;
96 case id_vasprintf
: format_idx
= 1; HasVAListArg
= true; break;
97 case id_vfprintf
: format_idx
= 1; HasVAListArg
= true; break;
98 case id_vsnprintf
: format_idx
= 2; HasVAListArg
= true; break;
99 case id_vsnprintf_chk
: format_idx
= 4; HasVAListArg
= true; break;
100 case id_vsprintf
: format_idx
= 1; HasVAListArg
= true; break;
101 case id_vsprintf_chk
: format_idx
= 3; HasVAListArg
= true; break;
102 case id_vprintf
: format_idx
= 0; HasVAListArg
= true; break;
105 CheckPrintfArguments(TheCall
.get(), HasVAListArg
, format_idx
);
108 return TheCall
.take();
111 /// CheckBuiltinCFStringArgument - Checks that the argument to the builtin
112 /// CFString constructor is correct
113 bool Sema::CheckBuiltinCFStringArgument(Expr
* Arg
) {
114 Arg
= Arg
->IgnoreParenCasts();
116 StringLiteral
*Literal
= dyn_cast
<StringLiteral
>(Arg
);
118 if (!Literal
|| Literal
->isWide()) {
119 Diag(Arg
->getLocStart(),
120 diag::err_cfstring_literal_not_string_constant
,
121 Arg
->getSourceRange());
125 const char *Data
= Literal
->getStrData();
126 unsigned Length
= Literal
->getByteLength();
128 for (unsigned i
= 0; i
< Length
; ++i
) {
129 if (!isascii(Data
[i
])) {
130 Diag(PP
.AdvanceToTokenCharacter(Arg
->getLocStart(), i
+ 1),
131 diag::warn_cfstring_literal_contains_non_ascii_character
,
132 Arg
->getSourceRange());
137 Diag(PP
.AdvanceToTokenCharacter(Arg
->getLocStart(), i
+ 1),
138 diag::warn_cfstring_literal_contains_nul_character
,
139 Arg
->getSourceRange());
147 /// SemaBuiltinVAStart - Check the arguments to __builtin_va_start for validity.
148 /// Emit an error and return true on failure, return false on success.
149 bool Sema::SemaBuiltinVAStart(CallExpr
*TheCall
) {
150 Expr
*Fn
= TheCall
->getCallee();
151 if (TheCall
->getNumArgs() > 2) {
152 Diag(TheCall
->getArg(2)->getLocStart(),
153 diag::err_typecheck_call_too_many_args
, Fn
->getSourceRange(),
154 SourceRange(TheCall
->getArg(2)->getLocStart(),
155 (*(TheCall
->arg_end()-1))->getLocEnd()));
159 // Determine whether the current function is variadic or not.
161 if (getCurFunctionDecl())
163 cast
<FunctionTypeProto
>(getCurFunctionDecl()->getType())->isVariadic();
165 isVariadic
= getCurMethodDecl()->isVariadic();
168 Diag(Fn
->getLocStart(), diag::err_va_start_used_in_non_variadic_function
);
172 // Verify that the second argument to the builtin is the last argument of the
173 // current function or method.
174 bool SecondArgIsLastNamedArgument
= false;
175 const Expr
*Arg
= TheCall
->getArg(1)->IgnoreParenCasts();
177 if (const DeclRefExpr
*DR
= dyn_cast
<DeclRefExpr
>(Arg
)) {
178 if (const ParmVarDecl
*PV
= dyn_cast
<ParmVarDecl
>(DR
->getDecl())) {
179 // FIXME: This isn't correct for methods (results in bogus warning).
180 // Get the last formal in the current function.
181 const ParmVarDecl
*LastArg
;
182 if (getCurFunctionDecl())
183 LastArg
= *(getCurFunctionDecl()->param_end()-1);
185 LastArg
= *(getCurMethodDecl()->param_end()-1);
186 SecondArgIsLastNamedArgument
= PV
== LastArg
;
190 if (!SecondArgIsLastNamedArgument
)
191 Diag(TheCall
->getArg(1)->getLocStart(),
192 diag::warn_second_parameter_of_va_start_not_last_named_argument
);
196 /// SemaBuiltinUnorderedCompare - Handle functions like __builtin_isgreater and
197 /// friends. This is declared to take (...), so we have to check everything.
198 bool Sema::SemaBuiltinUnorderedCompare(CallExpr
*TheCall
) {
199 if (TheCall
->getNumArgs() < 2)
200 return Diag(TheCall
->getLocEnd(), diag::err_typecheck_call_too_few_args
);
201 if (TheCall
->getNumArgs() > 2)
202 return Diag(TheCall
->getArg(2)->getLocStart(),
203 diag::err_typecheck_call_too_many_args
,
204 SourceRange(TheCall
->getArg(2)->getLocStart(),
205 (*(TheCall
->arg_end()-1))->getLocEnd()));
207 Expr
*OrigArg0
= TheCall
->getArg(0);
208 Expr
*OrigArg1
= TheCall
->getArg(1);
210 // Do standard promotions between the two arguments, returning their common
212 QualType Res
= UsualArithmeticConversions(OrigArg0
, OrigArg1
, false);
214 // If the common type isn't a real floating type, then the arguments were
215 // invalid for this operation.
216 if (!Res
->isRealFloatingType())
217 return Diag(OrigArg0
->getLocStart(),
218 diag::err_typecheck_call_invalid_ordered_compare
,
219 OrigArg0
->getType().getAsString(),
220 OrigArg1
->getType().getAsString(),
221 SourceRange(OrigArg0
->getLocStart(), OrigArg1
->getLocEnd()));
226 bool Sema::SemaBuiltinStackAddress(CallExpr
*TheCall
) {
227 // The signature for these builtins is exact; the only thing we need
228 // to check is that the argument is a constant.
230 if (!TheCall
->getArg(0)->isIntegerConstantExpr(Context
, &Loc
))
231 return Diag(Loc
, diag::err_stack_const_level
, TheCall
->getSourceRange());
236 /// SemaBuiltinShuffleVector - Handle __builtin_shufflevector.
237 // This is declared to take (...), so we have to check everything.
238 Action::ExprResult
Sema::SemaBuiltinShuffleVector(CallExpr
*TheCall
) {
239 if (TheCall
->getNumArgs() < 3)
240 return Diag(TheCall
->getLocEnd(), diag::err_typecheck_call_too_few_args
,
241 TheCall
->getSourceRange());
243 QualType FAType
= TheCall
->getArg(0)->getType();
244 QualType SAType
= TheCall
->getArg(1)->getType();
246 if (!FAType
->isVectorType() || !SAType
->isVectorType()) {
247 Diag(TheCall
->getLocStart(), diag::err_shufflevector_non_vector
,
248 SourceRange(TheCall
->getArg(0)->getLocStart(),
249 TheCall
->getArg(1)->getLocEnd()));
253 if (Context
.getCanonicalType(FAType
).getUnqualifiedType() !=
254 Context
.getCanonicalType(SAType
).getUnqualifiedType()) {
255 Diag(TheCall
->getLocStart(), diag::err_shufflevector_incompatible_vector
,
256 SourceRange(TheCall
->getArg(0)->getLocStart(),
257 TheCall
->getArg(1)->getLocEnd()));
261 unsigned numElements
= FAType
->getAsVectorType()->getNumElements();
262 if (TheCall
->getNumArgs() != numElements
+2) {
263 if (TheCall
->getNumArgs() < numElements
+2)
264 return Diag(TheCall
->getLocEnd(), diag::err_typecheck_call_too_few_args
,
265 TheCall
->getSourceRange());
266 return Diag(TheCall
->getLocEnd(), diag::err_typecheck_call_too_many_args
,
267 TheCall
->getSourceRange());
270 for (unsigned i
= 2; i
< TheCall
->getNumArgs(); i
++) {
271 llvm::APSInt
Result(32);
272 if (!TheCall
->getArg(i
)->isIntegerConstantExpr(Result
, Context
))
273 return Diag(TheCall
->getLocStart(),
274 diag::err_shufflevector_nonconstant_argument
,
275 TheCall
->getArg(i
)->getSourceRange());
277 if (Result
.getActiveBits() > 64 || Result
.getZExtValue() >= numElements
*2)
278 return Diag(TheCall
->getLocStart(),
279 diag::err_shufflevector_argument_too_large
,
280 TheCall
->getArg(i
)->getSourceRange());
283 llvm::SmallVector
<Expr
*, 32> exprs
;
285 for (unsigned i
= 0, e
= TheCall
->getNumArgs(); i
!= e
; i
++) {
286 exprs
.push_back(TheCall
->getArg(i
));
287 TheCall
->setArg(i
, 0);
290 return new ShuffleVectorExpr(exprs
.begin(), numElements
+2, FAType
,
291 TheCall
->getCallee()->getLocStart(),
292 TheCall
->getRParenLoc());
295 /// SemaBuiltinPrefetch - Handle __builtin_prefetch.
296 // This is declared to take (const void*, ...) and can take two
297 // optional constant int args.
298 bool Sema::SemaBuiltinPrefetch(CallExpr
*TheCall
) {
299 unsigned numArgs
= TheCall
->getNumArgs();
303 res
|= Diag(TheCall
->getLocEnd(), diag::err_typecheck_call_too_many_args
,
304 TheCall
->getSourceRange());
307 // Argument 0 is checked for us and the remaining arguments must be
308 // constant integers.
309 for (unsigned i
=1; i
<numArgs
; ++i
) {
310 Expr
*Arg
= TheCall
->getArg(i
);
311 QualType RWType
= Arg
->getType();
313 const BuiltinType
*BT
= RWType
->getAsBuiltinType();
315 if (!BT
|| BT
->getKind() != BuiltinType::Int
||
316 !Arg
->isIntegerConstantExpr(Result
, Context
)) {
317 if (Diag(TheCall
->getLocStart(), diag::err_prefetch_invalid_argument
,
318 SourceRange(Arg
->getLocStart(), Arg
->getLocEnd()))) {
324 // FIXME: gcc issues a warning and rewrites these to 0. These
325 // seems especially odd for the third argument since the default
328 if (Result
.getSExtValue() < 0 || Result
.getSExtValue() > 1)
329 res
|= Diag(TheCall
->getLocStart(), diag::err_argument_invalid_range
,
331 SourceRange(Arg
->getLocStart(), Arg
->getLocEnd()));
333 if (Result
.getSExtValue() < 0 || Result
.getSExtValue() > 3)
334 res
|= Diag(TheCall
->getLocStart(), diag::err_argument_invalid_range
,
336 SourceRange(Arg
->getLocStart(), Arg
->getLocEnd()));
343 /// SemaBuiltinObjectSize - Handle __builtin_object_size(void *ptr,
344 /// int type). This simply type checks that type is one of the defined
346 bool Sema::SemaBuiltinObjectSize(CallExpr
*TheCall
) {
347 Expr
*Arg
= TheCall
->getArg(1);
348 QualType ArgType
= Arg
->getType();
349 const BuiltinType
*BT
= ArgType
->getAsBuiltinType();
350 llvm::APSInt
Result(32);
351 if (!BT
|| BT
->getKind() != BuiltinType::Int
||
352 !Arg
->isIntegerConstantExpr(Result
, Context
)) {
353 return Diag(TheCall
->getLocStart(), diag::err_object_size_invalid_argument
,
354 SourceRange(Arg
->getLocStart(), Arg
->getLocEnd()));
357 if (Result
.getSExtValue() < 0 || Result
.getSExtValue() > 3) {
358 return Diag(TheCall
->getLocStart(), diag::err_argument_invalid_range
,
360 SourceRange(Arg
->getLocStart(), Arg
->getLocEnd()));
366 /// CheckPrintfArguments - Check calls to printf (and similar functions) for
367 /// correct use of format strings.
369 /// HasVAListArg - A predicate indicating whether the printf-like
370 /// function is passed an explicit va_arg argument (e.g., vprintf)
372 /// format_idx - The index into Args for the format string.
374 /// Improper format strings to functions in the printf family can be
375 /// the source of bizarre bugs and very serious security holes. A
376 /// good source of information is available in the following paper
377 /// (which includes additional references):
379 /// FormatGuard: Automatic Protection From printf Format String
380 /// Vulnerabilities, Proceedings of the 10th USENIX Security Symposium, 2001.
382 /// Functionality implemented:
384 /// We can statically check the following properties for string
385 /// literal format strings for non v.*printf functions (where the
386 /// arguments are passed directly):
388 /// (1) Are the number of format conversions equal to the number of
391 /// (2) Does each format conversion correctly match the type of the
392 /// corresponding data argument? (TODO)
394 /// Moreover, for all printf functions we can:
396 /// (3) Check for a missing format string (when not caught by type checking).
398 /// (4) Check for no-operation flags; e.g. using "#" with format
399 /// conversion 'c' (TODO)
401 /// (5) Check the use of '%n', a major source of security holes.
403 /// (6) Check for malformed format conversions that don't specify anything.
405 /// (7) Check for empty format strings. e.g: printf("");
407 /// (8) Check that the format string is a wide literal.
409 /// (9) Also check the arguments of functions with the __format__ attribute.
412 /// All of these checks can be done by parsing the format string.
414 /// For now, we ONLY do (1), (3), (5), (6), (7), and (8).
416 Sema::CheckPrintfArguments(CallExpr
*TheCall
, bool HasVAListArg
,
417 unsigned format_idx
) {
418 Expr
*Fn
= TheCall
->getCallee();
420 // CHECK: printf-like function is called with no format string.
421 if (format_idx
>= TheCall
->getNumArgs()) {
422 Diag(TheCall
->getRParenLoc(), diag::warn_printf_missing_format_string
,
423 Fn
->getSourceRange());
427 Expr
*OrigFormatExpr
= TheCall
->getArg(format_idx
)->IgnoreParenCasts();
429 // CHECK: format string is not a string literal.
431 // Dynamically generated format strings are difficult to
432 // automatically vet at compile time. Requiring that format strings
433 // are string literals: (1) permits the checking of format strings by
434 // the compiler and thereby (2) can practically remove the source of
435 // many format string exploits.
437 // Format string can be either ObjC string (e.g. @"%d") or
438 // C string (e.g. "%d")
439 // ObjC string uses the same format specifiers as C string, so we can use
440 // the same format string checking logic for both ObjC and C strings.
441 ObjCStringLiteral
*ObjCFExpr
= dyn_cast
<ObjCStringLiteral
>(OrigFormatExpr
);
442 StringLiteral
*FExpr
= NULL
;
444 if(ObjCFExpr
!= NULL
)
445 FExpr
= ObjCFExpr
->getString();
447 FExpr
= dyn_cast
<StringLiteral
>(OrigFormatExpr
);
450 // For vprintf* functions (i.e., HasVAListArg==true), we add a
451 // special check to see if the format string is a function parameter
452 // of the function calling the printf function. If the function
453 // has an attribute indicating it is a printf-like function, then we
454 // should suppress warnings concerning non-literals being used in a call
455 // to a vprintf function. For example:
458 // logmessage(char const *fmt __attribute__ (format (printf, 1, 2)), ...) {
460 // va_start(ap, fmt);
461 // vprintf(fmt, ap); // Do NOT emit a warning about "fmt".
465 // FIXME: We don't have full attribute support yet, so just check to see
466 // if the argument is a DeclRefExpr that references a parameter. We'll
467 // add proper support for checking the attribute later.
469 if (DeclRefExpr
* DR
= dyn_cast
<DeclRefExpr
>(OrigFormatExpr
))
470 if (isa
<ParmVarDecl
>(DR
->getDecl()))
473 Diag(TheCall
->getArg(format_idx
)->getLocStart(),
474 diag::warn_printf_not_string_constant
,
475 OrigFormatExpr
->getSourceRange());
479 // CHECK: is the format string a wide literal?
480 if (FExpr
->isWide()) {
481 Diag(FExpr
->getLocStart(),
482 diag::warn_printf_format_string_is_wide_literal
,
483 OrigFormatExpr
->getSourceRange());
487 // Str - The format string. NOTE: this is NOT null-terminated!
488 const char * const Str
= FExpr
->getStrData();
490 // CHECK: empty format string?
491 const unsigned StrLen
= FExpr
->getByteLength();
494 Diag(FExpr
->getLocStart(), diag::warn_printf_empty_format_string
,
495 OrigFormatExpr
->getSourceRange());
499 // We process the format string using a binary state machine. The
500 // current state is stored in CurrentState.
504 } CurrentState
= state_OrdChr
;
506 // numConversions - The number of conversions seen so far. This is
507 // incremented as we traverse the format string.
508 unsigned numConversions
= 0;
510 // numDataArgs - The number of data arguments after the format
511 // string. This can only be determined for non vprintf-like
512 // functions. For those functions, this value is 1 (the sole
514 unsigned numDataArgs
= TheCall
->getNumArgs()-(format_idx
+1);
516 // Inspect the format string.
519 // LastConversionIdx - Index within the format string where we last saw
520 // a '%' character that starts a new format conversion.
521 unsigned LastConversionIdx
= 0;
523 for (; StrIdx
< StrLen
; ++StrIdx
) {
525 // Is the number of detected conversion conversions greater than
526 // the number of matching data arguments? If so, stop.
527 if (!HasVAListArg
&& numConversions
> numDataArgs
) break;
530 if (Str
[StrIdx
] == '\0') {
531 // The string returned by getStrData() is not null-terminated,
532 // so the presence of a null character is likely an error.
533 Diag(PP
.AdvanceToTokenCharacter(FExpr
->getLocStart(), StrIdx
+1),
534 diag::warn_printf_format_string_contains_null_char
,
535 OrigFormatExpr
->getSourceRange());
539 // Ordinary characters (not processing a format conversion).
540 if (CurrentState
== state_OrdChr
) {
541 if (Str
[StrIdx
] == '%') {
542 CurrentState
= state_Conversion
;
543 LastConversionIdx
= StrIdx
;
548 // Seen '%'. Now processing a format conversion.
549 switch (Str
[StrIdx
]) {
550 // Handle dynamic precision or width specifier.
554 if (!HasVAListArg
&& numConversions
> numDataArgs
) {
555 SourceLocation Loc
= FExpr
->getLocStart();
556 Loc
= PP
.AdvanceToTokenCharacter(Loc
, StrIdx
+1);
558 if (Str
[StrIdx
-1] == '.')
559 Diag(Loc
, diag::warn_printf_asterisk_precision_missing_arg
,
560 OrigFormatExpr
->getSourceRange());
562 Diag(Loc
, diag::warn_printf_asterisk_width_missing_arg
,
563 OrigFormatExpr
->getSourceRange());
565 // Don't do any more checking. We'll just emit spurious errors.
569 // Perform type checking on width/precision specifier.
570 Expr
*E
= TheCall
->getArg(format_idx
+numConversions
);
571 if (const BuiltinType
*BT
= E
->getType()->getAsBuiltinType())
572 if (BT
->getKind() == BuiltinType::Int
)
576 PP
.AdvanceToTokenCharacter(FExpr
->getLocStart(), StrIdx
+1);
578 if (Str
[StrIdx
-1] == '.')
579 Diag(Loc
, diag::warn_printf_asterisk_precision_wrong_type
,
580 E
->getType().getAsString(), E
->getSourceRange());
582 Diag(Loc
, diag::warn_printf_asterisk_width_wrong_type
,
583 E
->getType().getAsString(), E
->getSourceRange());
588 // Characters which can terminate a format conversion
589 // (e.g. "%d"). Characters that specify length modifiers or
590 // other flags are handled by the default case below.
592 // FIXME: additional checks will go into the following cases.
616 CurrentState
= state_OrdChr
;
619 // CHECK: Are we using "%n"? Issue a warning.
622 CurrentState
= state_OrdChr
;
623 SourceLocation Loc
= PP
.AdvanceToTokenCharacter(FExpr
->getLocStart(),
624 LastConversionIdx
+1);
626 Diag(Loc
, diag::warn_printf_write_back
, OrigFormatExpr
->getSourceRange());
632 // %@ is allowed in ObjC format strings only.
633 if(ObjCFExpr
!= NULL
)
634 CurrentState
= state_OrdChr
;
636 // Issue a warning: invalid format conversion.
637 SourceLocation Loc
= PP
.AdvanceToTokenCharacter(FExpr
->getLocStart(),
638 LastConversionIdx
+1);
640 Diag(Loc
, diag::warn_printf_invalid_conversion
,
641 std::string(Str
+LastConversionIdx
,
642 Str
+std::min(LastConversionIdx
+2, StrLen
)),
643 OrigFormatExpr
->getSourceRange());
650 // Sanity check: Was the first "%" character the previous one?
651 // If not, we will assume that we have a malformed format
652 // conversion, and that the current "%" character is the start
653 // of a new conversion.
654 if (StrIdx
- LastConversionIdx
== 1)
655 CurrentState
= state_OrdChr
;
657 // Issue a warning: invalid format conversion.
658 SourceLocation Loc
= PP
.AdvanceToTokenCharacter(FExpr
->getLocStart(),
659 LastConversionIdx
+1);
661 Diag(Loc
, diag::warn_printf_invalid_conversion
,
662 std::string(Str
+LastConversionIdx
, Str
+StrIdx
),
663 OrigFormatExpr
->getSourceRange());
665 // This conversion is broken. Advance to the next format
667 LastConversionIdx
= StrIdx
;
673 // This case catches all other characters: flags, widths, etc.
674 // We should eventually process those as well.
679 if (CurrentState
== state_Conversion
) {
680 // Issue a warning: invalid format conversion.
681 SourceLocation Loc
= PP
.AdvanceToTokenCharacter(FExpr
->getLocStart(),
682 LastConversionIdx
+1);
684 Diag(Loc
, diag::warn_printf_invalid_conversion
,
685 std::string(Str
+LastConversionIdx
,
686 Str
+std::min(LastConversionIdx
+2, StrLen
)),
687 OrigFormatExpr
->getSourceRange());
692 // CHECK: Does the number of format conversions exceed the number
693 // of data arguments?
694 if (numConversions
> numDataArgs
) {
695 SourceLocation Loc
= PP
.AdvanceToTokenCharacter(FExpr
->getLocStart(),
698 Diag(Loc
, diag::warn_printf_insufficient_data_args
,
699 OrigFormatExpr
->getSourceRange());
701 // CHECK: Does the number of data arguments exceed the number of
702 // format conversions in the format string?
703 else if (numConversions
< numDataArgs
)
704 Diag(TheCall
->getArg(format_idx
+numConversions
+1)->getLocStart(),
705 diag::warn_printf_too_many_data_args
,
706 OrigFormatExpr
->getSourceRange());
710 //===--- CHECK: Return Address of Stack Variable --------------------------===//
712 static DeclRefExpr
* EvalVal(Expr
*E
);
713 static DeclRefExpr
* EvalAddr(Expr
* E
);
715 /// CheckReturnStackAddr - Check if a return statement returns the address
716 /// of a stack variable.
718 Sema::CheckReturnStackAddr(Expr
*RetValExp
, QualType lhsType
,
719 SourceLocation ReturnLoc
) {
721 // Perform checking for returned stack addresses.
722 if (lhsType
->isPointerType() || lhsType
->isBlockPointerType()) {
723 if (DeclRefExpr
*DR
= EvalAddr(RetValExp
))
724 Diag(DR
->getLocStart(), diag::warn_ret_stack_addr
,
725 DR
->getDecl()->getIdentifier()->getName(),
726 RetValExp
->getSourceRange());
728 // Skip over implicit cast expressions when checking for block expressions.
729 if (ImplicitCastExpr
*IcExpr
=
730 dyn_cast_or_null
<ImplicitCastExpr
>(RetValExp
))
731 RetValExp
= IcExpr
->getSubExpr();
733 if (BlockExpr
*C
= dyn_cast_or_null
<BlockExpr
>(RetValExp
))
734 Diag(C
->getLocStart(), diag::err_ret_local_block
,
735 C
->getSourceRange());
737 // Perform checking for stack values returned by reference.
738 else if (lhsType
->isReferenceType()) {
739 // Check for an implicit cast to a reference.
740 if (ImplicitCastExpr
*I
= dyn_cast
<ImplicitCastExpr
>(RetValExp
))
741 if (DeclRefExpr
*DR
= EvalVal(I
->getSubExpr()))
742 Diag(DR
->getLocStart(), diag::warn_ret_stack_ref
,
743 DR
->getDecl()->getIdentifier()->getName(),
744 RetValExp
->getSourceRange());
748 /// EvalAddr - EvalAddr and EvalVal are mutually recursive functions that
749 /// check if the expression in a return statement evaluates to an address
750 /// to a location on the stack. The recursion is used to traverse the
751 /// AST of the return expression, with recursion backtracking when we
752 /// encounter a subexpression that (1) clearly does not lead to the address
753 /// of a stack variable or (2) is something we cannot determine leads to
754 /// the address of a stack variable based on such local checking.
756 /// EvalAddr processes expressions that are pointers that are used as
757 /// references (and not L-values). EvalVal handles all other values.
758 /// At the base case of the recursion is a check for a DeclRefExpr* in
759 /// the refers to a stack variable.
761 /// This implementation handles:
763 /// * pointer-to-pointer casts
764 /// * implicit conversions from array references to pointers
765 /// * taking the address of fields
766 /// * arbitrary interplay between "&" and "*" operators
767 /// * pointer arithmetic from an address of a stack variable
768 /// * taking the address of an array element where the array is on the stack
769 static DeclRefExpr
* EvalAddr(Expr
*E
) {
770 // We should only be called for evaluating pointer expressions.
771 assert((E
->getType()->isPointerType() ||
772 E
->getType()->isBlockPointerType() ||
773 E
->getType()->isObjCQualifiedIdType()) &&
774 "EvalAddr only works on pointers");
776 // Our "symbolic interpreter" is just a dispatch off the currently
777 // viewed AST node. We then recursively traverse the AST by calling
778 // EvalAddr and EvalVal appropriately.
779 switch (E
->getStmtClass()) {
780 case Stmt::ParenExprClass
:
781 // Ignore parentheses.
782 return EvalAddr(cast
<ParenExpr
>(E
)->getSubExpr());
784 case Stmt::UnaryOperatorClass
: {
785 // The only unary operator that make sense to handle here
786 // is AddrOf. All others don't make sense as pointers.
787 UnaryOperator
*U
= cast
<UnaryOperator
>(E
);
789 if (U
->getOpcode() == UnaryOperator::AddrOf
)
790 return EvalVal(U
->getSubExpr());
795 case Stmt::BinaryOperatorClass
: {
796 // Handle pointer arithmetic. All other binary operators are not valid
798 BinaryOperator
*B
= cast
<BinaryOperator
>(E
);
799 BinaryOperator::Opcode op
= B
->getOpcode();
801 if (op
!= BinaryOperator::Add
&& op
!= BinaryOperator::Sub
)
804 Expr
*Base
= B
->getLHS();
806 // Determine which argument is the real pointer base. It could be
807 // the RHS argument instead of the LHS.
808 if (!Base
->getType()->isPointerType()) Base
= B
->getRHS();
810 assert (Base
->getType()->isPointerType());
811 return EvalAddr(Base
);
814 // For conditional operators we need to see if either the LHS or RHS are
815 // valid DeclRefExpr*s. If one of them is valid, we return it.
816 case Stmt::ConditionalOperatorClass
: {
817 ConditionalOperator
*C
= cast
<ConditionalOperator
>(E
);
819 // Handle the GNU extension for missing LHS.
820 if (Expr
*lhsExpr
= C
->getLHS())
821 if (DeclRefExpr
* LHS
= EvalAddr(lhsExpr
))
824 return EvalAddr(C
->getRHS());
827 // For casts, we need to handle conversions from arrays to
828 // pointer values, and pointer-to-pointer conversions.
829 case Stmt::ExplicitCastExprClass
:
830 case Stmt::ImplicitCastExprClass
: {
832 Expr
* SubExpr
= cast
<CastExpr
>(E
)->getSubExpr();
833 QualType T
= SubExpr
->getType();
835 if (SubExpr
->getType()->isPointerType() ||
836 SubExpr
->getType()->isBlockPointerType() ||
837 SubExpr
->getType()->isObjCQualifiedIdType())
838 return EvalAddr(SubExpr
);
839 else if (T
->isArrayType())
840 return EvalVal(SubExpr
);
845 // C++ casts. For dynamic casts, static casts, and const casts, we
846 // are always converting from a pointer-to-pointer, so we just blow
847 // through the cast. In the case the dynamic cast doesn't fail
848 // (and return NULL), we take the conservative route and report cases
849 // where we return the address of a stack variable. For Reinterpre
850 case Stmt::CXXCastExprClass
: {
851 CXXCastExpr
*C
= cast
<CXXCastExpr
>(E
);
853 if (C
->getOpcode() == CXXCastExpr::ReinterpretCast
) {
854 Expr
*S
= C
->getSubExpr();
855 if (S
->getType()->isPointerType() || S
->getType()->isBlockPointerType())
861 return EvalAddr(C
->getSubExpr());
864 // Everything else: we simply don't reason about them.
871 /// EvalVal - This function is complements EvalAddr in the mutual recursion.
872 /// See the comments for EvalAddr for more details.
873 static DeclRefExpr
* EvalVal(Expr
*E
) {
875 // We should only be called for evaluating non-pointer expressions, or
876 // expressions with a pointer type that are not used as references but instead
877 // are l-values (e.g., DeclRefExpr with a pointer type).
879 // Our "symbolic interpreter" is just a dispatch off the currently
880 // viewed AST node. We then recursively traverse the AST by calling
881 // EvalAddr and EvalVal appropriately.
882 switch (E
->getStmtClass()) {
883 case Stmt::DeclRefExprClass
: {
884 // DeclRefExpr: the base case. When we hit a DeclRefExpr we are looking
885 // at code that refers to a variable's name. We check if it has local
886 // storage within the function, and if so, return the expression.
887 DeclRefExpr
*DR
= cast
<DeclRefExpr
>(E
);
889 if (VarDecl
*V
= dyn_cast
<VarDecl
>(DR
->getDecl()))
890 if(V
->hasLocalStorage()) return DR
;
895 case Stmt::ParenExprClass
:
896 // Ignore parentheses.
897 return EvalVal(cast
<ParenExpr
>(E
)->getSubExpr());
899 case Stmt::UnaryOperatorClass
: {
900 // The only unary operator that make sense to handle here
901 // is Deref. All others don't resolve to a "name." This includes
902 // handling all sorts of rvalues passed to a unary operator.
903 UnaryOperator
*U
= cast
<UnaryOperator
>(E
);
905 if (U
->getOpcode() == UnaryOperator::Deref
)
906 return EvalAddr(U
->getSubExpr());
911 case Stmt::ArraySubscriptExprClass
: {
912 // Array subscripts are potential references to data on the stack. We
913 // retrieve the DeclRefExpr* for the array variable if it indeed
914 // has local storage.
915 return EvalAddr(cast
<ArraySubscriptExpr
>(E
)->getBase());
918 case Stmt::ConditionalOperatorClass
: {
919 // For conditional operators we need to see if either the LHS or RHS are
920 // non-NULL DeclRefExpr's. If one is non-NULL, we return it.
921 ConditionalOperator
*C
= cast
<ConditionalOperator
>(E
);
923 // Handle the GNU extension for missing LHS.
924 if (Expr
*lhsExpr
= C
->getLHS())
925 if (DeclRefExpr
*LHS
= EvalVal(lhsExpr
))
928 return EvalVal(C
->getRHS());
931 // Accesses to members are potential references to data on the stack.
932 case Stmt::MemberExprClass
: {
933 MemberExpr
*M
= cast
<MemberExpr
>(E
);
935 // Check for indirect access. We only want direct field accesses.
937 return EvalVal(M
->getBase());
942 // Everything else: we simply don't reason about them.
948 //===--- CHECK: Floating-Point comparisons (-Wfloat-equal) ---------------===//
950 /// Check for comparisons of floating point operands using != and ==.
951 /// Issue a warning if these are no self-comparisons, as they are not likely
952 /// to do what the programmer intended.
953 void Sema::CheckFloatComparison(SourceLocation loc
, Expr
* lex
, Expr
*rex
) {
954 bool EmitWarning
= true;
956 Expr
* LeftExprSansParen
= lex
->IgnoreParens();
957 Expr
* RightExprSansParen
= rex
->IgnoreParens();
959 // Special case: check for x == x (which is OK).
960 // Do not emit warnings for such cases.
961 if (DeclRefExpr
* DRL
= dyn_cast
<DeclRefExpr
>(LeftExprSansParen
))
962 if (DeclRefExpr
* DRR
= dyn_cast
<DeclRefExpr
>(RightExprSansParen
))
963 if (DRL
->getDecl() == DRR
->getDecl())
967 // Special case: check for comparisons against literals that can be exactly
968 // represented by APFloat. In such cases, do not emit a warning. This
969 // is a heuristic: often comparison against such literals are used to
970 // detect if a value in a variable has not changed. This clearly can
971 // lead to false negatives.
973 if (FloatingLiteral
* FLL
= dyn_cast
<FloatingLiteral
>(LeftExprSansParen
)) {
978 if (FloatingLiteral
* FLR
= dyn_cast
<FloatingLiteral
>(RightExprSansParen
)){
984 // Check for comparisons with builtin types.
986 if (CallExpr
* CL
= dyn_cast
<CallExpr
>(LeftExprSansParen
))
987 if (isCallBuiltin(CL
))
991 if (CallExpr
* CR
= dyn_cast
<CallExpr
>(RightExprSansParen
))
992 if (isCallBuiltin(CR
))
995 // Emit the diagnostic.
997 Diag(loc
, diag::warn_floatingpoint_eq
,
998 lex
->getSourceRange(),rex
->getSourceRange());