lib/Sema/SemaChecking.cpp

   1 //===--- SemaChecking.cpp - Extra Semantic Checking -----------------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 //  This file implements extra semantic analysis beyond what is enforced
  11 //  by the C type system.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #include "Sema.h"
  16 #include "clang/AST/ASTContext.h"
  17 #include "clang/AST/DeclObjC.h"
  18 #include "clang/AST/ExprCXX.h"
  19 #include "clang/AST/ExprObjC.h"
  20 #include "clang/Lex/Preprocessor.h"
  21 #include "clang/Basic/Diagnostic.h"
  22 #include "SemaUtil.h"
  23 using namespace clang;
  24
  25 /// CheckFunctionCall - Check a direct function call for various correctness
  26 /// and safety properties not strictly enforced by the C type system.
  27 Action::ExprResult
  28 Sema::CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCallRaw) {
  29   llvm::OwningPtr<CallExpr> TheCall(TheCallRaw);
  30   // Get the IdentifierInfo* for the called function.
  31   IdentifierInfo *FnInfo = FDecl->getIdentifier();
  32
  33   switch (FnInfo->getBuiltinID()) {
  34   case Builtin::BI__builtin___CFStringMakeConstantString:
  35     assert(TheCall->getNumArgs() == 1 &&
  36            "Wrong # arguments to builtin CFStringMakeConstantString");
  37     if (CheckBuiltinCFStringArgument(TheCall->getArg(0)))
  38       return true;
  39     return TheCall.take();
  40   case Builtin::BI__builtin_stdarg_start:
  41   case Builtin::BI__builtin_va_start:
  42     if (SemaBuiltinVAStart(TheCall.get()))
  43       return true;
  44     return TheCall.take();
  45   case Builtin::BI__builtin_isgreater:
  46   case Builtin::BI__builtin_isgreaterequal:
  47   case Builtin::BI__builtin_isless:
  48   case Builtin::BI__builtin_islessequal:
  49   case Builtin::BI__builtin_islessgreater:
  50   case Builtin::BI__builtin_isunordered:
  51     if (SemaBuiltinUnorderedCompare(TheCall.get()))
  52       return true;
  53     return TheCall.take();
  54   case Builtin::BI__builtin_return_address:
  55   case Builtin::BI__builtin_frame_address:
  56     if (SemaBuiltinStackAddress(TheCall.get()))
  57       return true;
  58     return TheCall.take();
  59   case Builtin::BI__builtin_shufflevector:
  60     return SemaBuiltinShuffleVector(TheCall.get());
  61   case Builtin::BI__builtin_prefetch:
  62     if (SemaBuiltinPrefetch(TheCall.get()))
  63       return true;
  64     return TheCall.take();
  65   case Builtin::BI__builtin_object_size:
  66     if (SemaBuiltinObjectSize(TheCall.get()))
  67       return true;
  68   }
  69
  70   // FIXME: This mechanism should be abstracted to be less fragile and
  71   // more efficient. For example, just map function ids to custom
  72   // handlers.
  73
  74   // Search the KnownFunctionIDs for the identifier.
  75   unsigned i = 0, e = id_num_known_functions;
  76   for (; i != e; ++i) { if (KnownFunctionIDs[i] == FnInfo) break; }
  77   if (i == e) return TheCall.take();
  78
  79   // Printf checking.
  80   if (i <= id_vprintf) {
  81     // Retrieve the index of the format string parameter and determine
  82     // if the function is passed a va_arg argument.
  83     unsigned format_idx = 0;
  84     bool HasVAListArg = false;
  85
  86     switch (i) {
  87     default: assert(false && "No format string argument index.");
  88     case id_NSLog:         format_idx = 0; break;
  89     case id_asprintf:      format_idx = 1; break;
  90     case id_fprintf:       format_idx = 1; break;
  91     case id_printf:        format_idx = 0; break;
  92     case id_snprintf:      format_idx = 2; break;
  93     case id_snprintf_chk:  format_idx = 4; break;
  94     case id_sprintf:       format_idx = 1; break;
  95     case id_sprintf_chk:   format_idx = 3; break;
  96     case id_vasprintf:     format_idx = 1; HasVAListArg = true; break;
  97     case id_vfprintf:      format_idx = 1; HasVAListArg = true; break;
  98     case id_vsnprintf:     format_idx = 2; HasVAListArg = true; break;
  99     case id_vsnprintf_chk: format_idx = 4; HasVAListArg = true; break;
 100     case id_vsprintf:      format_idx = 1; HasVAListArg = true; break;
 101     case id_vsprintf_chk:  format_idx = 3; HasVAListArg = true; break;
 102     case id_vprintf:       format_idx = 0; HasVAListArg = true; break;
 103     }
 104
 105     CheckPrintfArguments(TheCall.get(), HasVAListArg, format_idx);
 106   }
 107
 108   return TheCall.take();
 109 }
 110
 111 /// CheckBuiltinCFStringArgument - Checks that the argument to the builtin
 112 /// CFString constructor is correct
 113 bool Sema::CheckBuiltinCFStringArgument(Expr* Arg) {
 114   Arg = Arg->IgnoreParenCasts();
 115
 116   StringLiteral *Literal = dyn_cast<StringLiteral>(Arg);
 117
 118   if (!Literal || Literal->isWide()) {
 119     Diag(Arg->getLocStart(),
 120          diag::err_cfstring_literal_not_string_constant,
 121          Arg->getSourceRange());
 122     return true;
 123   }
 124
 125   const char *Data = Literal->getStrData();
 126   unsigned Length = Literal->getByteLength();
 127
 128   for (unsigned i = 0; i < Length; ++i) {
 129     if (!isascii(Data[i])) {
 130       Diag(PP.AdvanceToTokenCharacter(Arg->getLocStart(), i + 1),
 131            diag::warn_cfstring_literal_contains_non_ascii_character,
 132            Arg->getSourceRange());
 133       break;
 134     }
 135
 136     if (!Data[i]) {
 137       Diag(PP.AdvanceToTokenCharacter(Arg->getLocStart(), i + 1),
 138            diag::warn_cfstring_literal_contains_nul_character,
 139            Arg->getSourceRange());
 140       break;
 141     }
 142   }
 143
 144   return false;
 145 }
 146
 147 /// SemaBuiltinVAStart - Check the arguments to __builtin_va_start for validity.
 148 /// Emit an error and return true on failure, return false on success.
 149 bool Sema::SemaBuiltinVAStart(CallExpr *TheCall) {
 150   Expr *Fn = TheCall->getCallee();
 151   if (TheCall->getNumArgs() > 2) {
 152     Diag(TheCall->getArg(2)->getLocStart(),
 153          diag::err_typecheck_call_too_many_args, Fn->getSourceRange(),
 154          SourceRange(TheCall->getArg(2)->getLocStart(),
 155                      (*(TheCall->arg_end()-1))->getLocEnd()));
 156     return true;
 157   }
 158
 159   // Determine whether the current function is variadic or not.
 160   bool isVariadic;
 161   if (getCurFunctionDecl())
 162     isVariadic =
 163       cast<FunctionTypeProto>(getCurFunctionDecl()->getType())->isVariadic();
 164   else
 165     isVariadic = getCurMethodDecl()->isVariadic();
 166
 167   if (!isVariadic) {
 168     Diag(Fn->getLocStart(), diag::err_va_start_used_in_non_variadic_function);
 169     return true;
 170   }
 171
 172   // Verify that the second argument to the builtin is the last argument of the
 173   // current function or method.
 174   bool SecondArgIsLastNamedArgument = false;
 175   const Expr *Arg = TheCall->getArg(1)->IgnoreParenCasts();
 176
 177   if (const DeclRefExpr *DR = dyn_cast<DeclRefExpr>(Arg)) {
 178     if (const ParmVarDecl *PV = dyn_cast<ParmVarDecl>(DR->getDecl())) {
 179       // FIXME: This isn't correct for methods (results in bogus warning).
 180       // Get the last formal in the current function.
 181       const ParmVarDecl *LastArg;
 182       if (getCurFunctionDecl())
 183         LastArg = *(getCurFunctionDecl()->param_end()-1);
 184       else
 185         LastArg = *(getCurMethodDecl()->param_end()-1);
 186       SecondArgIsLastNamedArgument = PV == LastArg;
 187     }
 188   }
 189
 190   if (!SecondArgIsLastNamedArgument)
 191     Diag(TheCall->getArg(1)->getLocStart(),
 192          diag::warn_second_parameter_of_va_start_not_last_named_argument);
 193   return false;
 194 }
 195
 196 /// SemaBuiltinUnorderedCompare - Handle functions like __builtin_isgreater and
 197 /// friends.  This is declared to take (...), so we have to check everything.
 198 bool Sema::SemaBuiltinUnorderedCompare(CallExpr *TheCall) {
 199   if (TheCall->getNumArgs() < 2)
 200     return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args);
 201   if (TheCall->getNumArgs() > 2)
 202     return Diag(TheCall->getArg(2)->getLocStart(),
 203                 diag::err_typecheck_call_too_many_args,
 204                 SourceRange(TheCall->getArg(2)->getLocStart(),
 205                             (*(TheCall->arg_end()-1))->getLocEnd()));
 206
 207   Expr *OrigArg0 = TheCall->getArg(0);
 208   Expr *OrigArg1 = TheCall->getArg(1);
 209
 210   // Do standard promotions between the two arguments, returning their common
 211   // type.
 212   QualType Res = UsualArithmeticConversions(OrigArg0, OrigArg1, false);
 213
 214   // If the common type isn't a real floating type, then the arguments were
 215   // invalid for this operation.
 216   if (!Res->isRealFloatingType())
 217     return Diag(OrigArg0->getLocStart(),
 218                 diag::err_typecheck_call_invalid_ordered_compare,
 219                 OrigArg0->getType().getAsString(),
 220                 OrigArg1->getType().getAsString(),
 221                 SourceRange(OrigArg0->getLocStart(), OrigArg1->getLocEnd()));
 222
 223   return false;
 224 }
 225
 226 bool Sema::SemaBuiltinStackAddress(CallExpr *TheCall) {
 227   // The signature for these builtins is exact; the only thing we need
 228   // to check is that the argument is a constant.
 229   SourceLocation Loc;
 230   if (!TheCall->getArg(0)->isIntegerConstantExpr(Context, &Loc))
 231     return Diag(Loc, diag::err_stack_const_level, TheCall->getSourceRange());
 232
 233   return false;
 234 }
 235
 236 /// SemaBuiltinShuffleVector - Handle __builtin_shufflevector.
 237 // This is declared to take (...), so we have to check everything.
 238 Action::ExprResult Sema::SemaBuiltinShuffleVector(CallExpr *TheCall) {
 239   if (TheCall->getNumArgs() < 3)
 240     return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args,
 241                 TheCall->getSourceRange());
 242
 243   QualType FAType = TheCall->getArg(0)->getType();
 244   QualType SAType = TheCall->getArg(1)->getType();
 245
 246   if (!FAType->isVectorType() || !SAType->isVectorType()) {
 247     Diag(TheCall->getLocStart(), diag::err_shufflevector_non_vector,
 248          SourceRange(TheCall->getArg(0)->getLocStart(),
 249                      TheCall->getArg(1)->getLocEnd()));
 250     return true;
 251   }
 252
 253   if (Context.getCanonicalType(FAType).getUnqualifiedType() !=
 254       Context.getCanonicalType(SAType).getUnqualifiedType()) {
 255     Diag(TheCall->getLocStart(), diag::err_shufflevector_incompatible_vector,
 256          SourceRange(TheCall->getArg(0)->getLocStart(),
 257                      TheCall->getArg(1)->getLocEnd()));
 258     return true;
 259   }
 260
 261   unsigned numElements = FAType->getAsVectorType()->getNumElements();
 262   if (TheCall->getNumArgs() != numElements+2) {
 263     if (TheCall->getNumArgs() < numElements+2)
 264       return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args,
 265                   TheCall->getSourceRange());
 266     return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_many_args,
 267                 TheCall->getSourceRange());
 268   }
 269
 270   for (unsigned i = 2; i < TheCall->getNumArgs(); i++) {
 271     llvm::APSInt Result(32);
 272     if (!TheCall->getArg(i)->isIntegerConstantExpr(Result, Context))
 273       return Diag(TheCall->getLocStart(),
 274                   diag::err_shufflevector_nonconstant_argument,
 275                   TheCall->getArg(i)->getSourceRange());
 276
 277     if (Result.getActiveBits() > 64 || Result.getZExtValue() >= numElements*2)
 278       return Diag(TheCall->getLocStart(),
 279                   diag::err_shufflevector_argument_too_large,
 280                   TheCall->getArg(i)->getSourceRange());
 281   }
 282
 283   llvm::SmallVector<Expr*, 32> exprs;
 284
 285   for (unsigned i = 0, e = TheCall->getNumArgs(); i != e; i++) {
 286     exprs.push_back(TheCall->getArg(i));
 287     TheCall->setArg(i, 0);
 288   }
 289
 290   return new ShuffleVectorExpr(exprs.begin(), numElements+2, FAType,
 291                                TheCall->getCallee()->getLocStart(),
 292                                TheCall->getRParenLoc());
 293 }
 294
 295 /// SemaBuiltinPrefetch - Handle __builtin_prefetch.
 296 // This is declared to take (const void*, ...) and can take two
 297 // optional constant int args.
 298 bool Sema::SemaBuiltinPrefetch(CallExpr *TheCall) {
 299   unsigned numArgs = TheCall->getNumArgs();
 300   bool res = false;
 301
 302   if (numArgs > 3) {
 303     res |= Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_many_args,
 304                 TheCall->getSourceRange());
 305   }
 306
 307   // Argument 0 is checked for us and the remaining arguments must be
 308   // constant integers.
 309   for (unsigned i=1; i<numArgs; ++i) {
 310     Expr *Arg = TheCall->getArg(i);
 311     QualType RWType = Arg->getType();
 312
 313     const BuiltinType *BT = RWType->getAsBuiltinType();
 314     llvm::APSInt Result;
 315     if (!BT || BT->getKind() != BuiltinType::Int ||
 316         !Arg->isIntegerConstantExpr(Result, Context)) {
 317       if (Diag(TheCall->getLocStart(), diag::err_prefetch_invalid_argument,
 318                SourceRange(Arg->getLocStart(), Arg->getLocEnd()))) {
 319         res = true;
 320         continue;
 321       }
 322     }
 323
 324     // FIXME: gcc issues a warning and rewrites these to 0. These
 325     // seems especially odd for the third argument since the default
 326     // is 3.
 327     if (i==1) {
 328       if (Result.getSExtValue() < 0 || Result.getSExtValue() > 1)
 329         res |= Diag(TheCall->getLocStart(), diag::err_argument_invalid_range,
 330                     "0", "1",
 331                     SourceRange(Arg->getLocStart(), Arg->getLocEnd()));
 332     } else {
 333       if (Result.getSExtValue() < 0 || Result.getSExtValue() > 3)
 334         res |= Diag(TheCall->getLocStart(), diag::err_argument_invalid_range,
 335                     "0", "3",
 336                     SourceRange(Arg->getLocStart(), Arg->getLocEnd()));
 337     }
 338   }
 339
 340   return res;
 341 }
 342
 343 /// SemaBuiltinObjectSize - Handle __builtin_object_size(void *ptr,
 344 /// int type). This simply type checks that type is one of the defined
 345 /// constants (0-3).
 346 bool Sema::SemaBuiltinObjectSize(CallExpr *TheCall) {
 347   Expr *Arg = TheCall->getArg(1);
 348   QualType ArgType = Arg->getType();
 349   const BuiltinType *BT = ArgType->getAsBuiltinType();
 350   llvm::APSInt Result(32);
 351   if (!BT || BT->getKind() != BuiltinType::Int ||
 352       !Arg->isIntegerConstantExpr(Result, Context)) {
 353     return Diag(TheCall->getLocStart(), diag::err_object_size_invalid_argument,
 354                 SourceRange(Arg->getLocStart(), Arg->getLocEnd()));
 355   }
 356
 357   if (Result.getSExtValue() < 0 || Result.getSExtValue() > 3) {
 358     return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range,
 359                 "0", "3",
 360                 SourceRange(Arg->getLocStart(), Arg->getLocEnd()));
 361   }
 362
 363   return false;
 364 }
 365
 366 /// CheckPrintfArguments - Check calls to printf (and similar functions) for
 367 /// correct use of format strings.
 368 ///
 369 ///  HasVAListArg - A predicate indicating whether the printf-like
 370 ///    function is passed an explicit va_arg argument (e.g., vprintf)
 371 ///
 372 ///  format_idx - The index into Args for the format string.
 373 ///
 374 /// Improper format strings to functions in the printf family can be
 375 /// the source of bizarre bugs and very serious security holes.  A
 376 /// good source of information is available in the following paper
 377 /// (which includes additional references):
 378 ///
 379 ///  FormatGuard: Automatic Protection From printf Format String
 380 ///  Vulnerabilities, Proceedings of the 10th USENIX Security Symposium, 2001.
 381 ///
 382 /// Functionality implemented:
 383 ///
 384 ///  We can statically check the following properties for string
 385 ///  literal format strings for non v.*printf functions (where the
 386 ///  arguments are passed directly):
 387 //
 388 ///  (1) Are the number of format conversions equal to the number of
 389 ///      data arguments?
 390 ///
 391 ///  (2) Does each format conversion correctly match the type of the
 392 ///      corresponding data argument?  (TODO)
 393 ///
 394 /// Moreover, for all printf functions we can:
 395 ///
 396 ///  (3) Check for a missing format string (when not caught by type checking).
 397 ///
 398 ///  (4) Check for no-operation flags; e.g. using "#" with format
 399 ///      conversion 'c'  (TODO)
 400 ///
 401 ///  (5) Check the use of '%n', a major source of security holes.
 402 ///
 403 ///  (6) Check for malformed format conversions that don't specify anything.
 404 ///
 405 ///  (7) Check for empty format strings.  e.g: printf("");
 406 ///
 407 ///  (8) Check that the format string is a wide literal.
 408 ///
 409 ///  (9) Also check the arguments of functions with the __format__ attribute.
 410 ///      (TODO).
 411 ///
 412 /// All of these checks can be done by parsing the format string.
 413 ///
 414 /// For now, we ONLY do (1), (3), (5), (6), (7), and (8).
 415 void
 416 Sema::CheckPrintfArguments(CallExpr *TheCall, bool HasVAListArg,
 417                            unsigned format_idx) {
 418   Expr *Fn = TheCall->getCallee();
 419
 420   // CHECK: printf-like function is called with no format string.
 421   if (format_idx >= TheCall->getNumArgs()) {
 422     Diag(TheCall->getRParenLoc(), diag::warn_printf_missing_format_string,
 423          Fn->getSourceRange());
 424     return;
 425   }
 426
 427   Expr *OrigFormatExpr = TheCall->getArg(format_idx)->IgnoreParenCasts();
 428
 429   // CHECK: format string is not a string literal.
 430   //
 431   // Dynamically generated format strings are difficult to
 432   // automatically vet at compile time.  Requiring that format strings
 433   // are string literals: (1) permits the checking of format strings by
 434   // the compiler and thereby (2) can practically remove the source of
 435   // many format string exploits.
 436
 437   // Format string can be either ObjC string (e.g. @"%d") or
 438   // C string (e.g. "%d")
 439   // ObjC string uses the same format specifiers as C string, so we can use
 440   // the same format string checking logic for both ObjC and C strings.
 441   ObjCStringLiteral *ObjCFExpr = dyn_cast<ObjCStringLiteral>(OrigFormatExpr);
 442   StringLiteral *FExpr = NULL;
 443
 444   if(ObjCFExpr != NULL)
 445     FExpr = ObjCFExpr->getString();
 446   else
 447     FExpr = dyn_cast<StringLiteral>(OrigFormatExpr);
 448
 449   if (FExpr == NULL) {
 450     // For vprintf* functions (i.e., HasVAListArg==true), we add a
 451     // special check to see if the format string is a function parameter
 452     // of the function calling the printf function.  If the function
 453     // has an attribute indicating it is a printf-like function, then we
 454     // should suppress warnings concerning non-literals being used in a call
 455     // to a vprintf function.  For example:
 456     //
 457     // void
 458     // logmessage(char const *fmt __attribute__ (format (printf, 1, 2)), ...) {
 459     //      va_list ap;
 460     //      va_start(ap, fmt);
 461     //      vprintf(fmt, ap);  // Do NOT emit a warning about "fmt".
 462     //      ...
 463     //
 464     //
 465     //  FIXME: We don't have full attribute support yet, so just check to see
 466     //    if the argument is a DeclRefExpr that references a parameter.  We'll
 467     //    add proper support for checking the attribute later.
 468     if (HasVAListArg)
 469       if (DeclRefExpr* DR = dyn_cast<DeclRefExpr>(OrigFormatExpr))
 470         if (isa<ParmVarDecl>(DR->getDecl()))
 471           return;
 472
 473     Diag(TheCall->getArg(format_idx)->getLocStart(),
 474          diag::warn_printf_not_string_constant,
 475          OrigFormatExpr->getSourceRange());
 476     return;
 477   }
 478
 479   // CHECK: is the format string a wide literal?
 480   if (FExpr->isWide()) {
 481     Diag(FExpr->getLocStart(),
 482          diag::warn_printf_format_string_is_wide_literal,
 483          OrigFormatExpr->getSourceRange());
 484     return;
 485   }
 486
 487   // Str - The format string.  NOTE: this is NOT null-terminated!
 488   const char * const Str = FExpr->getStrData();
 489
 490   // CHECK: empty format string?
 491   const unsigned StrLen = FExpr->getByteLength();
 492
 493   if (StrLen == 0) {
 494     Diag(FExpr->getLocStart(), diag::warn_printf_empty_format_string,
 495          OrigFormatExpr->getSourceRange());
 496     return;
 497   }
 498
 499   // We process the format string using a binary state machine.  The
 500   // current state is stored in CurrentState.
 501   enum {
 502     state_OrdChr,
 503     state_Conversion
 504   } CurrentState = state_OrdChr;
 505
 506   // numConversions - The number of conversions seen so far.  This is
 507   //  incremented as we traverse the format string.
 508   unsigned numConversions = 0;
 509
 510   // numDataArgs - The number of data arguments after the format
 511   //  string.  This can only be determined for non vprintf-like
 512   //  functions.  For those functions, this value is 1 (the sole
 513   //  va_arg argument).
 514   unsigned numDataArgs = TheCall->getNumArgs()-(format_idx+1);
 515
 516   // Inspect the format string.
 517   unsigned StrIdx = 0;
 518
 519   // LastConversionIdx - Index within the format string where we last saw
 520   //  a '%' character that starts a new format conversion.
 521   unsigned LastConversionIdx = 0;
 522
 523   for (; StrIdx < StrLen; ++StrIdx) {
 524
 525     // Is the number of detected conversion conversions greater than
 526     // the number of matching data arguments?  If so, stop.
 527     if (!HasVAListArg && numConversions > numDataArgs) break;
 528
 529     // Handle "\0"
 530     if (Str[StrIdx] == '\0') {
 531       // The string returned by getStrData() is not null-terminated,
 532       // so the presence of a null character is likely an error.
 533       Diag(PP.AdvanceToTokenCharacter(FExpr->getLocStart(), StrIdx+1),
 534            diag::warn_printf_format_string_contains_null_char,
 535            OrigFormatExpr->getSourceRange());
 536       return;
 537     }
 538
 539     // Ordinary characters (not processing a format conversion).
 540     if (CurrentState == state_OrdChr) {
 541       if (Str[StrIdx] == '%') {
 542         CurrentState = state_Conversion;
 543         LastConversionIdx = StrIdx;
 544       }
 545       continue;
 546     }
 547
 548     // Seen '%'.  Now processing a format conversion.
 549     switch (Str[StrIdx]) {
 550     // Handle dynamic precision or width specifier.
 551     case '*': {
 552       ++numConversions;
 553
 554       if (!HasVAListArg && numConversions > numDataArgs) {
 555         SourceLocation Loc = FExpr->getLocStart();
 556         Loc = PP.AdvanceToTokenCharacter(Loc, StrIdx+1);
 557
 558         if (Str[StrIdx-1] == '.')
 559           Diag(Loc, diag::warn_printf_asterisk_precision_missing_arg,
 560                OrigFormatExpr->getSourceRange());
 561         else
 562           Diag(Loc, diag::warn_printf_asterisk_width_missing_arg,
 563                OrigFormatExpr->getSourceRange());
 564
 565         // Don't do any more checking.  We'll just emit spurious errors.
 566         return;
 567       }
 568
 569       // Perform type checking on width/precision specifier.
 570       Expr *E = TheCall->getArg(format_idx+numConversions);
 571       if (const BuiltinType *BT = E->getType()->getAsBuiltinType())
 572         if (BT->getKind() == BuiltinType::Int)
 573           break;
 574
 575       SourceLocation Loc =
 576         PP.AdvanceToTokenCharacter(FExpr->getLocStart(), StrIdx+1);
 577
 578       if (Str[StrIdx-1] == '.')
 579         Diag(Loc, diag::warn_printf_asterisk_precision_wrong_type,
 580              E->getType().getAsString(), E->getSourceRange());
 581       else
 582         Diag(Loc, diag::warn_printf_asterisk_width_wrong_type,
 583              E->getType().getAsString(), E->getSourceRange());
 584
 585       break;
 586     }
 587
 588     // Characters which can terminate a format conversion
 589     // (e.g. "%d").  Characters that specify length modifiers or
 590     // other flags are handled by the default case below.
 591     //
 592     // FIXME: additional checks will go into the following cases.
 593     case 'i':
 594     case 'd':
 595     case 'o':
 596     case 'u':
 597     case 'x':
 598     case 'X':
 599     case 'D':
 600     case 'O':
 601     case 'U':
 602     case 'e':
 603     case 'E':
 604     case 'f':
 605     case 'F':
 606     case 'g':
 607     case 'G':
 608     case 'a':
 609     case 'A':
 610     case 'c':
 611     case 'C':
 612     case 'S':
 613     case 's':
 614     case 'p':
 615       ++numConversions;
 616       CurrentState = state_OrdChr;
 617       break;
 618
 619     // CHECK: Are we using "%n"?  Issue a warning.
 620     case 'n': {
 621       ++numConversions;
 622       CurrentState = state_OrdChr;
 623       SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(),
 624                                                       LastConversionIdx+1);
 625
 626       Diag(Loc, diag::warn_printf_write_back, OrigFormatExpr->getSourceRange());
 627       break;
 628     }
 629
 630     // Handle "%@"
 631     case '@':
 632       // %@ is allowed in ObjC format strings only.
 633       if(ObjCFExpr != NULL)
 634         CurrentState = state_OrdChr;
 635       else {
 636         // Issue a warning: invalid format conversion.
 637         SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(),
 638                                                     LastConversionIdx+1);
 639
 640         Diag(Loc, diag::warn_printf_invalid_conversion,
 641           std::string(Str+LastConversionIdx,
 642           Str+std::min(LastConversionIdx+2, StrLen)),
 643           OrigFormatExpr->getSourceRange());
 644       }
 645       ++numConversions;
 646       break;
 647
 648     // Handle "%%"
 649     case '%':
 650       // Sanity check: Was the first "%" character the previous one?
 651       // If not, we will assume that we have a malformed format
 652       // conversion, and that the current "%" character is the start
 653       // of a new conversion.
 654       if (StrIdx - LastConversionIdx == 1)
 655         CurrentState = state_OrdChr;
 656       else {
 657         // Issue a warning: invalid format conversion.
 658         SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(),
 659                                                         LastConversionIdx+1);
 660
 661         Diag(Loc, diag::warn_printf_invalid_conversion,
 662              std::string(Str+LastConversionIdx, Str+StrIdx),
 663              OrigFormatExpr->getSourceRange());
 664
 665         // This conversion is broken.  Advance to the next format
 666         // conversion.
 667         LastConversionIdx = StrIdx;
 668         ++numConversions;
 669       }
 670       break;
 671
 672     default:
 673       // This case catches all other characters: flags, widths, etc.
 674       // We should eventually process those as well.
 675       break;
 676     }
 677   }
 678
 679   if (CurrentState == state_Conversion) {
 680     // Issue a warning: invalid format conversion.
 681     SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(),
 682                                                     LastConversionIdx+1);
 683
 684     Diag(Loc, diag::warn_printf_invalid_conversion,
 685          std::string(Str+LastConversionIdx,
 686                      Str+std::min(LastConversionIdx+2, StrLen)),
 687          OrigFormatExpr->getSourceRange());
 688     return;
 689   }
 690
 691   if (!HasVAListArg) {
 692     // CHECK: Does the number of format conversions exceed the number
 693     //        of data arguments?
 694     if (numConversions > numDataArgs) {
 695       SourceLocation Loc = PP.AdvanceToTokenCharacter(FExpr->getLocStart(),
 696                                                       LastConversionIdx);
 697
 698       Diag(Loc, diag::warn_printf_insufficient_data_args,
 699            OrigFormatExpr->getSourceRange());
 700     }
 701     // CHECK: Does the number of data arguments exceed the number of
 702     //        format conversions in the format string?
 703     else if (numConversions < numDataArgs)
 704       Diag(TheCall->getArg(format_idx+numConversions+1)->getLocStart(),
 705            diag::warn_printf_too_many_data_args,
 706            OrigFormatExpr->getSourceRange());
 707   }
 708 }
 709
 710 //===--- CHECK: Return Address of Stack Variable --------------------------===//
 711
 712 static DeclRefExpr* EvalVal(Expr *E);
 713 static DeclRefExpr* EvalAddr(Expr* E);
 714
 715 /// CheckReturnStackAddr - Check if a return statement returns the address
 716 ///   of a stack variable.
 717 void
 718 Sema::CheckReturnStackAddr(Expr *RetValExp, QualType lhsType,
 719                            SourceLocation ReturnLoc) {
 720
 721   // Perform checking for returned stack addresses.
 722   if (lhsType->isPointerType() || lhsType->isBlockPointerType()) {
 723     if (DeclRefExpr *DR = EvalAddr(RetValExp))
 724       Diag(DR->getLocStart(), diag::warn_ret_stack_addr,
 725            DR->getDecl()->getIdentifier()->getName(),
 726            RetValExp->getSourceRange());
 727
 728     // Skip over implicit cast expressions when checking for block expressions.
 729     if (ImplicitCastExpr *IcExpr =
 730           dyn_cast_or_null<ImplicitCastExpr>(RetValExp))
 731       RetValExp = IcExpr->getSubExpr();
 732
 733     if (BlockExpr *C = dyn_cast_or_null<BlockExpr>(RetValExp))
 734       Diag(C->getLocStart(), diag::err_ret_local_block,
 735            C->getSourceRange());
 736   }
 737   // Perform checking for stack values returned by reference.
 738   else if (lhsType->isReferenceType()) {
 739     // Check for an implicit cast to a reference.
 740     if (ImplicitCastExpr *I = dyn_cast<ImplicitCastExpr>(RetValExp))
 741       if (DeclRefExpr *DR = EvalVal(I->getSubExpr()))
 742         Diag(DR->getLocStart(), diag::warn_ret_stack_ref,
 743              DR->getDecl()->getIdentifier()->getName(),
 744              RetValExp->getSourceRange());
 745   }
 746 }
 747
 748 /// EvalAddr - EvalAddr and EvalVal are mutually recursive functions that
 749 ///  check if the expression in a return statement evaluates to an address
 750 ///  to a location on the stack.  The recursion is used to traverse the
 751 ///  AST of the return expression, with recursion backtracking when we
 752 ///  encounter a subexpression that (1) clearly does not lead to the address
 753 ///  of a stack variable or (2) is something we cannot determine leads to
 754 ///  the address of a stack variable based on such local checking.
 755 ///
 756 ///  EvalAddr processes expressions that are pointers that are used as
 757 ///  references (and not L-values).  EvalVal handles all other values.
 758 ///  At the base case of the recursion is a check for a DeclRefExpr* in
 759 ///  the refers to a stack variable.
 760 ///
 761 ///  This implementation handles:
 762 ///
 763 ///   * pointer-to-pointer casts
 764 ///   * implicit conversions from array references to pointers
 765 ///   * taking the address of fields
 766 ///   * arbitrary interplay between "&" and "*" operators
 767 ///   * pointer arithmetic from an address of a stack variable
 768 ///   * taking the address of an array element where the array is on the stack
 769 static DeclRefExpr* EvalAddr(Expr *E) {
 770   // We should only be called for evaluating pointer expressions.
 771   assert((E->getType()->isPointerType() ||
 772           E->getType()->isBlockPointerType() ||
 773           E->getType()->isObjCQualifiedIdType()) &&
 774          "EvalAddr only works on pointers");
 775
 776   // Our "symbolic interpreter" is just a dispatch off the currently
 777   // viewed AST node.  We then recursively traverse the AST by calling
 778   // EvalAddr and EvalVal appropriately.
 779   switch (E->getStmtClass()) {
 780   case Stmt::ParenExprClass:
 781     // Ignore parentheses.
 782     return EvalAddr(cast<ParenExpr>(E)->getSubExpr());
 783
 784   case Stmt::UnaryOperatorClass: {
 785     // The only unary operator that make sense to handle here
 786     // is AddrOf.  All others don't make sense as pointers.
 787     UnaryOperator *U = cast<UnaryOperator>(E);
 788
 789     if (U->getOpcode() == UnaryOperator::AddrOf)
 790       return EvalVal(U->getSubExpr());
 791     else
 792       return NULL;
 793   }
 794
 795   case Stmt::BinaryOperatorClass: {
 796     // Handle pointer arithmetic.  All other binary operators are not valid
 797     // in this context.
 798     BinaryOperator *B = cast<BinaryOperator>(E);
 799     BinaryOperator::Opcode op = B->getOpcode();
 800
 801     if (op != BinaryOperator::Add && op != BinaryOperator::Sub)
 802       return NULL;
 803
 804     Expr *Base = B->getLHS();
 805
 806     // Determine which argument is the real pointer base.  It could be
 807     // the RHS argument instead of the LHS.
 808     if (!Base->getType()->isPointerType()) Base = B->getRHS();
 809
 810     assert (Base->getType()->isPointerType());
 811     return EvalAddr(Base);
 812   }
 813
 814   // For conditional operators we need to see if either the LHS or RHS are
 815   // valid DeclRefExpr*s.  If one of them is valid, we return it.
 816   case Stmt::ConditionalOperatorClass: {
 817     ConditionalOperator *C = cast<ConditionalOperator>(E);
 818
 819     // Handle the GNU extension for missing LHS.
 820     if (Expr *lhsExpr = C->getLHS())
 821       if (DeclRefExpr* LHS = EvalAddr(lhsExpr))
 822         return LHS;
 823
 824      return EvalAddr(C->getRHS());
 825   }
 826
 827   // For casts, we need to handle conversions from arrays to
 828   // pointer values, and pointer-to-pointer conversions.
 829   case Stmt::ExplicitCastExprClass:
 830   case Stmt::ImplicitCastExprClass: {
 831
 832     Expr* SubExpr = cast<CastExpr>(E)->getSubExpr();
 833     QualType T = SubExpr->getType();
 834
 835     if (SubExpr->getType()->isPointerType() ||
 836         SubExpr->getType()->isBlockPointerType() ||
 837         SubExpr->getType()->isObjCQualifiedIdType())
 838       return EvalAddr(SubExpr);
 839     else if (T->isArrayType())
 840       return EvalVal(SubExpr);
 841     else
 842       return 0;
 843   }
 844
 845   // C++ casts.  For dynamic casts, static casts, and const casts, we
 846   // are always converting from a pointer-to-pointer, so we just blow
 847   // through the cast.  In the case the dynamic cast doesn't fail
 848   // (and return NULL), we take the conservative route and report cases
 849   // where we return the address of a stack variable.  For Reinterpre
 850   case Stmt::CXXCastExprClass: {
 851     CXXCastExpr *C = cast<CXXCastExpr>(E);
 852
 853     if (C->getOpcode() == CXXCastExpr::ReinterpretCast) {
 854       Expr *S = C->getSubExpr();
 855       if (S->getType()->isPointerType() || S->getType()->isBlockPointerType())
 856         return EvalAddr(S);
 857       else
 858         return NULL;
 859     }
 860     else
 861       return EvalAddr(C->getSubExpr());
 862   }
 863
 864   // Everything else: we simply don't reason about them.
 865   default:
 866     return NULL;
 867   }
 868 }
 869
 870
 871 ///  EvalVal - This function is complements EvalAddr in the mutual recursion.
 872 ///   See the comments for EvalAddr for more details.
 873 static DeclRefExpr* EvalVal(Expr *E) {
 874
 875   // We should only be called for evaluating non-pointer expressions, or
 876   // expressions with a pointer type that are not used as references but instead
 877   // are l-values (e.g., DeclRefExpr with a pointer type).
 878
 879   // Our "symbolic interpreter" is just a dispatch off the currently
 880   // viewed AST node.  We then recursively traverse the AST by calling
 881   // EvalAddr and EvalVal appropriately.
 882   switch (E->getStmtClass()) {
 883   case Stmt::DeclRefExprClass: {
 884     // DeclRefExpr: the base case.  When we hit a DeclRefExpr we are looking
 885     //  at code that refers to a variable's name.  We check if it has local
 886     //  storage within the function, and if so, return the expression.
 887     DeclRefExpr *DR = cast<DeclRefExpr>(E);
 888
 889     if (VarDecl *V = dyn_cast<VarDecl>(DR->getDecl()))
 890       if(V->hasLocalStorage()) return DR;
 891
 892     return NULL;
 893   }
 894
 895   case Stmt::ParenExprClass:
 896     // Ignore parentheses.
 897     return EvalVal(cast<ParenExpr>(E)->getSubExpr());
 898
 899   case Stmt::UnaryOperatorClass: {
 900     // The only unary operator that make sense to handle here
 901     // is Deref.  All others don't resolve to a "name."  This includes
 902     // handling all sorts of rvalues passed to a unary operator.
 903     UnaryOperator *U = cast<UnaryOperator>(E);
 904
 905     if (U->getOpcode() == UnaryOperator::Deref)
 906       return EvalAddr(U->getSubExpr());
 907
 908     return NULL;
 909   }
 910
 911   case Stmt::ArraySubscriptExprClass: {
 912     // Array subscripts are potential references to data on the stack.  We
 913     // retrieve the DeclRefExpr* for the array variable if it indeed
 914     // has local storage.
 915     return EvalAddr(cast<ArraySubscriptExpr>(E)->getBase());
 916   }
 917
 918   case Stmt::ConditionalOperatorClass: {
 919     // For conditional operators we need to see if either the LHS or RHS are
 920     // non-NULL DeclRefExpr's.  If one is non-NULL, we return it.
 921     ConditionalOperator *C = cast<ConditionalOperator>(E);
 922
 923     // Handle the GNU extension for missing LHS.
 924     if (Expr *lhsExpr = C->getLHS())
 925       if (DeclRefExpr *LHS = EvalVal(lhsExpr))
 926         return LHS;
 927
 928     return EvalVal(C->getRHS());
 929   }
 930
 931   // Accesses to members are potential references to data on the stack.
 932   case Stmt::MemberExprClass: {
 933     MemberExpr *M = cast<MemberExpr>(E);
 934
 935     // Check for indirect access.  We only want direct field accesses.
 936     if (!M->isArrow())
 937       return EvalVal(M->getBase());
 938     else
 939       return NULL;
 940   }
 941
 942   // Everything else: we simply don't reason about them.
 943   default:
 944     return NULL;
 945   }
 946 }
 947
 948 //===--- CHECK: Floating-Point comparisons (-Wfloat-equal) ---------------===//
 949
 950 /// Check for comparisons of floating point operands using != and ==.
 951 /// Issue a warning if these are no self-comparisons, as they are not likely
 952 /// to do what the programmer intended.
 953 void Sema::CheckFloatComparison(SourceLocation loc, Expr* lex, Expr *rex) {
 954   bool EmitWarning = true;
 955
 956   Expr* LeftExprSansParen = lex->IgnoreParens();
 957   Expr* RightExprSansParen = rex->IgnoreParens();
 958
 959   // Special case: check for x == x (which is OK).
 960   // Do not emit warnings for such cases.
 961   if (DeclRefExpr* DRL = dyn_cast<DeclRefExpr>(LeftExprSansParen))
 962     if (DeclRefExpr* DRR = dyn_cast<DeclRefExpr>(RightExprSansParen))
 963       if (DRL->getDecl() == DRR->getDecl())
 964         EmitWarning = false;
 965
 966
 967   // Special case: check for comparisons against literals that can be exactly
 968   //  represented by APFloat.  In such cases, do not emit a warning.  This
 969   //  is a heuristic: often comparison against such literals are used to
 970   //  detect if a value in a variable has not changed.  This clearly can
 971   //  lead to false negatives.
 972   if (EmitWarning) {
 973     if (FloatingLiteral* FLL = dyn_cast<FloatingLiteral>(LeftExprSansParen)) {
 974       if (FLL->isExact())
 975         EmitWarning = false;
 976     }
 977     else
 978       if (FloatingLiteral* FLR = dyn_cast<FloatingLiteral>(RightExprSansParen)){
 979         if (FLR->isExact())
 980           EmitWarning = false;
 981     }
 982   }
 983
 984   // Check for comparisons with builtin types.
 985   if (EmitWarning)
 986     if (CallExpr* CL = dyn_cast<CallExpr>(LeftExprSansParen))
 987       if (isCallBuiltin(CL))
 988         EmitWarning = false;
 989
 990   if (EmitWarning)
 991     if (CallExpr* CR = dyn_cast<CallExpr>(RightExprSansParen))
 992       if (isCallBuiltin(CR))
 993         EmitWarning = false;
 994
 995   // Emit the diagnostic.
 996   if (EmitWarning)
 997     Diag(loc, diag::warn_floatingpoint_eq,
 998          lex->getSourceRange(),rex->getSourceRange());
 999 }