lib/Sema/SemaChecking.cpp

   1 //===--- SemaChecking.cpp - Extra Semantic Checking -----------------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 //  This file implements extra semantic analysis beyond what is enforced
  11 //  by the C type system.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #include "Sema.h"
  16 #include "clang/AST/ASTContext.h"
  17 #include "clang/AST/DeclObjC.h"
  18 #include "clang/AST/ExprCXX.h"
  19 #include "clang/AST/ExprObjC.h"
  20 #include "clang/Lex/LiteralSupport.h"
  21 #include "clang/Lex/Preprocessor.h"
  22 #include <limits>
  23 using namespace clang;
  24
  25 /// getLocationOfStringLiteralByte - Return a source location that points to the
  26 /// specified byte of the specified string literal.
  27 ///
  28 /// Strings are amazingly complex.  They can be formed from multiple tokens and
  29 /// can have escape sequences in them in addition to the usual trigraph and
  30 /// escaped newline business.  This routine handles this complexity.
  31 ///
  32 SourceLocation Sema::getLocationOfStringLiteralByte(const StringLiteral *SL,
  33                                                     unsigned ByteNo) const {
  34   assert(!SL->isWide() && "This doesn't work for wide strings yet");
  35
  36   // Loop over all of the tokens in this string until we find the one that
  37   // contains the byte we're looking for.
  38   unsigned TokNo = 0;
  39   while (1) {
  40     assert(TokNo < SL->getNumConcatenated() && "Invalid byte number!");
  41     SourceLocation StrTokLoc = SL->getStrTokenLoc(TokNo);
  42
  43     // Get the spelling of the string so that we can get the data that makes up
  44     // the string literal, not the identifier for the macro it is potentially
  45     // expanded through.
  46     SourceLocation StrTokSpellingLoc = SourceMgr.getSpellingLoc(StrTokLoc);
  47
  48     // Re-lex the token to get its length and original spelling.
  49     std::pair<FileID, unsigned> LocInfo =
  50       SourceMgr.getDecomposedLoc(StrTokSpellingLoc);
  51     std::pair<const char *,const char *> Buffer =
  52       SourceMgr.getBufferData(LocInfo.first);
  53     const char *StrData = Buffer.first+LocInfo.second;
  54
  55     // Create a langops struct and enable trigraphs.  This is sufficient for
  56     // relexing tokens.
  57     LangOptions LangOpts;
  58     LangOpts.Trigraphs = true;
  59
  60     // Create a lexer starting at the beginning of this token.
  61     Lexer TheLexer(StrTokSpellingLoc, LangOpts, Buffer.first, StrData,
  62                    Buffer.second);
  63     Token TheTok;
  64     TheLexer.LexFromRawLexer(TheTok);
  65
  66     // Use the StringLiteralParser to compute the length of the string in bytes.
  67     StringLiteralParser SLP(&TheTok, 1, PP);
  68     unsigned TokNumBytes = SLP.GetStringLength();
  69
  70     // If the byte is in this token, return the location of the byte.
  71     if (ByteNo < TokNumBytes ||
  72         (ByteNo == TokNumBytes && TokNo == SL->getNumConcatenated())) {
  73       unsigned Offset =
  74         StringLiteralParser::getOffsetOfStringByte(TheTok, ByteNo, PP);
  75
  76       // Now that we know the offset of the token in the spelling, use the
  77       // preprocessor to get the offset in the original source.
  78       return PP.AdvanceToTokenCharacter(StrTokLoc, Offset);
  79     }
  80
  81     // Move to the next string token.
  82     ++TokNo;
  83     ByteNo -= TokNumBytes;
  84   }
  85 }
  86
  87
  88 /// CheckFunctionCall - Check a direct function call for various correctness
  89 /// and safety properties not strictly enforced by the C type system.
  90 Action::OwningExprResult
  91 Sema::CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall) {
  92   OwningExprResult TheCallResult(Owned(TheCall));
  93   // Get the IdentifierInfo* for the called function.
  94   IdentifierInfo *FnInfo = FDecl->getIdentifier();
  95
  96   // None of the checks below are needed for functions that don't have
  97   // simple names (e.g., C++ conversion functions).
  98   if (!FnInfo)
  99     return move(TheCallResult);
 100
 101   switch (FDecl->getBuiltinID(Context)) {
 102   case Builtin::BI__builtin___CFStringMakeConstantString:
 103     assert(TheCall->getNumArgs() == 1 &&
 104            "Wrong # arguments to builtin CFStringMakeConstantString");
 105     if (CheckObjCString(TheCall->getArg(0)))
 106       return ExprError();
 107     return move(TheCallResult);
 108   case Builtin::BI__builtin_stdarg_start:
 109   case Builtin::BI__builtin_va_start:
 110     if (SemaBuiltinVAStart(TheCall))
 111       return ExprError();
 112     return move(TheCallResult);
 113   case Builtin::BI__builtin_isgreater:
 114   case Builtin::BI__builtin_isgreaterequal:
 115   case Builtin::BI__builtin_isless:
 116   case Builtin::BI__builtin_islessequal:
 117   case Builtin::BI__builtin_islessgreater:
 118   case Builtin::BI__builtin_isunordered:
 119     if (SemaBuiltinUnorderedCompare(TheCall))
 120       return ExprError();
 121     return move(TheCallResult);
 122   case Builtin::BI__builtin_return_address:
 123   case Builtin::BI__builtin_frame_address:
 124     if (SemaBuiltinStackAddress(TheCall))
 125       return ExprError();
 126     return move(TheCallResult);
 127   case Builtin::BI__builtin_shufflevector:
 128     return SemaBuiltinShuffleVector(TheCall);
 129     // TheCall will be freed by the smart pointer here, but that's fine, since
 130     // SemaBuiltinShuffleVector guts it, but then doesn't release it.
 131   case Builtin::BI__builtin_prefetch:
 132     if (SemaBuiltinPrefetch(TheCall))
 133       return ExprError();
 134     return move(TheCallResult);
 135   case Builtin::BI__builtin_object_size:
 136     if (SemaBuiltinObjectSize(TheCall))
 137       return ExprError();
 138     return move(TheCallResult);
 139   case Builtin::BI__builtin_longjmp:
 140     if (SemaBuiltinLongjmp(TheCall))
 141       return ExprError();
 142     return move(TheCallResult);
 143   case Builtin::BI__sync_fetch_and_add:
 144   case Builtin::BI__sync_fetch_and_sub:
 145   case Builtin::BI__sync_fetch_and_or:
 146   case Builtin::BI__sync_fetch_and_and:
 147   case Builtin::BI__sync_fetch_and_xor:
 148   case Builtin::BI__sync_fetch_and_nand:
 149   case Builtin::BI__sync_add_and_fetch:
 150   case Builtin::BI__sync_sub_and_fetch:
 151   case Builtin::BI__sync_and_and_fetch:
 152   case Builtin::BI__sync_or_and_fetch:
 153   case Builtin::BI__sync_xor_and_fetch:
 154   case Builtin::BI__sync_nand_and_fetch:
 155   case Builtin::BI__sync_val_compare_and_swap:
 156   case Builtin::BI__sync_bool_compare_and_swap:
 157   case Builtin::BI__sync_lock_test_and_set:
 158   case Builtin::BI__sync_lock_release:
 159     if (SemaBuiltinAtomicOverloaded(TheCall))
 160       return ExprError();
 161     return move(TheCallResult);
 162   }
 163
 164   // FIXME: This mechanism should be abstracted to be less fragile and
 165   // more efficient. For example, just map function ids to custom
 166   // handlers.
 167
 168   // Printf checking.
 169   if (const FormatAttr *Format = FDecl->getAttr<FormatAttr>()) {
 170     if (Format->getType() == "printf") {
 171       bool HasVAListArg = Format->getFirstArg() == 0;
 172       if (!HasVAListArg) {
 173         if (const FunctionProtoType *Proto
 174             = FDecl->getType()->getAsFunctionProtoType())
 175         HasVAListArg = !Proto->isVariadic();
 176       }
 177       CheckPrintfArguments(TheCall, HasVAListArg, Format->getFormatIdx() - 1,
 178                            HasVAListArg ? 0 : Format->getFirstArg() - 1);
 179     }
 180   }
 181   for (const Attr *attr = FDecl->getAttrs();
 182        attr; attr = attr->getNext()) {
 183     if (const NonNullAttr *NonNull = dyn_cast<NonNullAttr>(attr))
 184       CheckNonNullArguments(NonNull, TheCall);
 185   }
 186
 187   return move(TheCallResult);
 188 }
 189
 190 Action::OwningExprResult
 191 Sema::CheckBlockCall(NamedDecl *NDecl, CallExpr *TheCall) {
 192
 193   OwningExprResult TheCallResult(Owned(TheCall));
 194   // Printf checking.
 195   const FormatAttr *Format = NDecl->getAttr<FormatAttr>();
 196   if (!Format)
 197     return move(TheCallResult);
 198   const VarDecl *V = dyn_cast<VarDecl>(NDecl);
 199   if (!V)
 200     return move(TheCallResult);
 201   QualType Ty = V->getType();
 202   if (!Ty->isBlockPointerType())
 203     return move(TheCallResult);
 204   if (Format->getType() == "printf") {
 205       bool HasVAListArg = Format->getFirstArg() == 0;
 206       if (!HasVAListArg) {
 207         const FunctionType *FT =
 208           Ty->getAsBlockPointerType()->getPointeeType()->getAsFunctionType();
 209         if (const FunctionProtoType *Proto = dyn_cast<FunctionProtoType>(FT))
 210           HasVAListArg = !Proto->isVariadic();
 211       }
 212       CheckPrintfArguments(TheCall, HasVAListArg, Format->getFormatIdx() - 1,
 213                            HasVAListArg ? 0 : Format->getFirstArg() - 1);
 214   }
 215   return move(TheCallResult);
 216 }
 217
 218 /// SemaBuiltinAtomicOverloaded - We have a call to a function like
 219 /// __sync_fetch_and_add, which is an overloaded function based on the pointer
 220 /// type of its first argument.  The main ActOnCallExpr routines have already
 221 /// promoted the types of arguments because all of these calls are prototyped as
 222 /// void(...).
 223 ///
 224 /// This function goes through and does final semantic checking for these
 225 /// builtins,
 226 bool Sema::SemaBuiltinAtomicOverloaded(CallExpr *TheCall) {
 227   DeclRefExpr *DRE =cast<DeclRefExpr>(TheCall->getCallee()->IgnoreParenCasts());
 228   FunctionDecl *FDecl = cast<FunctionDecl>(DRE->getDecl());
 229
 230   // Ensure that we have at least one argument to do type inference from.
 231   if (TheCall->getNumArgs() < 1)
 232     return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args)
 233               << 0 << TheCall->getCallee()->getSourceRange();
 234
 235   // Inspect the first argument of the atomic builtin.  This should always be
 236   // a pointer type, whose element is an integral scalar or pointer type.
 237   // Because it is a pointer type, we don't have to worry about any implicit
 238   // casts here.
 239   Expr *FirstArg = TheCall->getArg(0);
 240   if (!FirstArg->getType()->isPointerType())
 241     return Diag(DRE->getLocStart(), diag::err_atomic_builtin_must_be_pointer)
 242              << FirstArg->getType() << FirstArg->getSourceRange();
 243
 244   QualType ValType = FirstArg->getType()->getAsPointerType()->getPointeeType();
 245   if (!ValType->isIntegerType() && !ValType->isPointerType() &&
 246       !ValType->isBlockPointerType())
 247     return Diag(DRE->getLocStart(),
 248                 diag::err_atomic_builtin_must_be_pointer_intptr)
 249              << FirstArg->getType() << FirstArg->getSourceRange();
 250
 251   // We need to figure out which concrete builtin this maps onto.  For example,
 252   // __sync_fetch_and_add with a 2 byte object turns into
 253   // __sync_fetch_and_add_2.
 254 #define BUILTIN_ROW(x) \
 255   { Builtin::BI##x##_1, Builtin::BI##x##_2, Builtin::BI##x##_4, \
 256     Builtin::BI##x##_8, Builtin::BI##x##_16 }
 257
 258   static const unsigned BuiltinIndices[][5] = {
 259     BUILTIN_ROW(__sync_fetch_and_add),
 260     BUILTIN_ROW(__sync_fetch_and_sub),
 261     BUILTIN_ROW(__sync_fetch_and_or),
 262     BUILTIN_ROW(__sync_fetch_and_and),
 263     BUILTIN_ROW(__sync_fetch_and_xor),
 264     BUILTIN_ROW(__sync_fetch_and_nand),
 265
 266     BUILTIN_ROW(__sync_add_and_fetch),
 267     BUILTIN_ROW(__sync_sub_and_fetch),
 268     BUILTIN_ROW(__sync_and_and_fetch),
 269     BUILTIN_ROW(__sync_or_and_fetch),
 270     BUILTIN_ROW(__sync_xor_and_fetch),
 271     BUILTIN_ROW(__sync_nand_and_fetch),
 272
 273     BUILTIN_ROW(__sync_val_compare_and_swap),
 274     BUILTIN_ROW(__sync_bool_compare_and_swap),
 275     BUILTIN_ROW(__sync_lock_test_and_set),
 276     BUILTIN_ROW(__sync_lock_release)
 277   };
 278 #undef BUILTIN_ROW
 279
 280   // Determine the index of the size.
 281   unsigned SizeIndex;
 282   switch (Context.getTypeSize(ValType)/8) {
 283   case 1: SizeIndex = 0; break;
 284   case 2: SizeIndex = 1; break;
 285   case 4: SizeIndex = 2; break;
 286   case 8: SizeIndex = 3; break;
 287   case 16: SizeIndex = 4; break;
 288   default:
 289     return Diag(DRE->getLocStart(), diag::err_atomic_builtin_pointer_size)
 290              << FirstArg->getType() << FirstArg->getSourceRange();
 291   }
 292
 293   // Each of these builtins has one pointer argument, followed by some number of
 294   // values (0, 1 or 2) followed by a potentially empty varags list of stuff
 295   // that we ignore.  Find out which row of BuiltinIndices to read from as well
 296   // as the number of fixed args.
 297   unsigned BuiltinID = FDecl->getBuiltinID(Context);
 298   unsigned BuiltinIndex, NumFixed = 1;
 299   switch (BuiltinID) {
 300   default: assert(0 && "Unknown overloaded atomic builtin!");
 301   case Builtin::BI__sync_fetch_and_add: BuiltinIndex = 0; break;
 302   case Builtin::BI__sync_fetch_and_sub: BuiltinIndex = 1; break;
 303   case Builtin::BI__sync_fetch_and_or:  BuiltinIndex = 2; break;
 304   case Builtin::BI__sync_fetch_and_and: BuiltinIndex = 3; break;
 305   case Builtin::BI__sync_fetch_and_xor: BuiltinIndex = 4; break;
 306   case Builtin::BI__sync_fetch_and_nand:BuiltinIndex = 5; break;
 307
 308   case Builtin::BI__sync_add_and_fetch: BuiltinIndex = 6; break;
 309   case Builtin::BI__sync_sub_and_fetch: BuiltinIndex = 7; break;
 310   case Builtin::BI__sync_and_and_fetch: BuiltinIndex = 8; break;
 311   case Builtin::BI__sync_or_and_fetch:  BuiltinIndex = 9; break;
 312   case Builtin::BI__sync_xor_and_fetch: BuiltinIndex =10; break;
 313   case Builtin::BI__sync_nand_and_fetch:BuiltinIndex =11; break;
 314
 315   case Builtin::BI__sync_val_compare_and_swap:
 316     BuiltinIndex = 12;
 317     NumFixed = 2;
 318     break;
 319   case Builtin::BI__sync_bool_compare_and_swap:
 320     BuiltinIndex = 13;
 321     NumFixed = 2;
 322     break;
 323   case Builtin::BI__sync_lock_test_and_set: BuiltinIndex = 14; break;
 324   case Builtin::BI__sync_lock_release:
 325     BuiltinIndex = 15;
 326     NumFixed = 0;
 327     break;
 328   }
 329
 330   // Now that we know how many fixed arguments we expect, first check that we
 331   // have at least that many.
 332   if (TheCall->getNumArgs() < 1+NumFixed)
 333     return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args)
 334             << 0 << TheCall->getCallee()->getSourceRange();
 335
 336
 337   // Get the decl for the concrete builtin from this, we can tell what the
 338   // concrete integer type we should convert to is.
 339   unsigned NewBuiltinID = BuiltinIndices[BuiltinIndex][SizeIndex];
 340   const char *NewBuiltinName = Context.BuiltinInfo.GetName(NewBuiltinID);
 341   IdentifierInfo *NewBuiltinII = PP.getIdentifierInfo(NewBuiltinName);
 342   FunctionDecl *NewBuiltinDecl =
 343     cast<FunctionDecl>(LazilyCreateBuiltin(NewBuiltinII, NewBuiltinID,
 344                                            TUScope, false, DRE->getLocStart()));
 345   const FunctionProtoType *BuiltinFT =
 346     NewBuiltinDecl->getType()->getAsFunctionProtoType();
 347   ValType = BuiltinFT->getArgType(0)->getAsPointerType()->getPointeeType();
 348
 349   // If the first type needs to be converted (e.g. void** -> int*), do it now.
 350   if (BuiltinFT->getArgType(0) != FirstArg->getType()) {
 351     ImpCastExprToType(FirstArg, BuiltinFT->getArgType(0), false);
 352     TheCall->setArg(0, FirstArg);
 353   }
 354
 355   // Next, walk the valid ones promoting to the right type.
 356   for (unsigned i = 0; i != NumFixed; ++i) {
 357     Expr *Arg = TheCall->getArg(i+1);
 358
 359     // If the argument is an implicit cast, then there was a promotion due to
 360     // "...", just remove it now.
 361     if (ImplicitCastExpr *ICE = dyn_cast<ImplicitCastExpr>(Arg)) {
 362       Arg = ICE->getSubExpr();
 363       ICE->setSubExpr(0);
 364       ICE->Destroy(Context);
 365       TheCall->setArg(i+1, Arg);
 366     }
 367
 368     // GCC does an implicit conversion to the pointer or integer ValType.  This
 369     // can fail in some cases (1i -> int**), check for this error case now.
 370     if (CheckCastTypes(Arg->getSourceRange(), ValType, Arg))
 371       return true;
 372
 373     // Okay, we have something that *can* be converted to the right type.  Check
 374     // to see if there is a potentially weird extension going on here.  This can
 375     // happen when you do an atomic operation on something like an char* and
 376     // pass in 42.  The 42 gets converted to char.  This is even more strange
 377     // for things like 45.123 -> char, etc.
 378     // FIXME: Do this check.
 379     ImpCastExprToType(Arg, ValType, false);
 380     TheCall->setArg(i+1, Arg);
 381   }
 382
 383   // Switch the DeclRefExpr to refer to the new decl.
 384   DRE->setDecl(NewBuiltinDecl);
 385   DRE->setType(NewBuiltinDecl->getType());
 386
 387   // Set the callee in the CallExpr.
 388   // FIXME: This leaks the original parens and implicit casts.
 389   Expr *PromotedCall = DRE;
 390   UsualUnaryConversions(PromotedCall);
 391   TheCall->setCallee(PromotedCall);
 392
 393
 394   // Change the result type of the call to match the result type of the decl.
 395   TheCall->setType(NewBuiltinDecl->getResultType());
 396   return false;
 397 }
 398
 399
 400 /// CheckObjCString - Checks that the argument to the builtin
 401 /// CFString constructor is correct
 402 /// FIXME: GCC currently emits the following warning:
 403 /// "warning: input conversion stopped due to an input byte that does not
 404 ///           belong to the input codeset UTF-8"
 405 /// Note: It might also make sense to do the UTF-16 conversion here (would
 406 /// simplify the backend).
 407 bool Sema::CheckObjCString(Expr *Arg) {
 408   Arg = Arg->IgnoreParenCasts();
 409   StringLiteral *Literal = dyn_cast<StringLiteral>(Arg);
 410
 411   if (!Literal || Literal->isWide()) {
 412     Diag(Arg->getLocStart(), diag::err_cfstring_literal_not_string_constant)
 413       << Arg->getSourceRange();
 414     return true;
 415   }
 416
 417   const char *Data = Literal->getStrData();
 418   unsigned Length = Literal->getByteLength();
 419
 420   for (unsigned i = 0; i < Length; ++i) {
 421     if (!Data[i]) {
 422       Diag(getLocationOfStringLiteralByte(Literal, i),
 423            diag::warn_cfstring_literal_contains_nul_character)
 424         << Arg->getSourceRange();
 425       break;
 426     }
 427   }
 428
 429   return false;
 430 }
 431
 432 /// SemaBuiltinVAStart - Check the arguments to __builtin_va_start for validity.
 433 /// Emit an error and return true on failure, return false on success.
 434 bool Sema::SemaBuiltinVAStart(CallExpr *TheCall) {
 435   Expr *Fn = TheCall->getCallee();
 436   if (TheCall->getNumArgs() > 2) {
 437     Diag(TheCall->getArg(2)->getLocStart(),
 438          diag::err_typecheck_call_too_many_args)
 439       << 0 /*function call*/ << Fn->getSourceRange()
 440       << SourceRange(TheCall->getArg(2)->getLocStart(),
 441                      (*(TheCall->arg_end()-1))->getLocEnd());
 442     return true;
 443   }
 444
 445   if (TheCall->getNumArgs() < 2) {
 446     return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args)
 447       << 0 /*function call*/;
 448   }
 449
 450   // Determine whether the current function is variadic or not.
 451   bool isVariadic;
 452   if (CurBlock)
 453     isVariadic = CurBlock->isVariadic;
 454   else if (getCurFunctionDecl()) {
 455     if (FunctionProtoType* FTP =
 456             dyn_cast<FunctionProtoType>(getCurFunctionDecl()->getType()))
 457       isVariadic = FTP->isVariadic();
 458     else
 459       isVariadic = false;
 460   } else {
 461     isVariadic = getCurMethodDecl()->isVariadic();
 462   }
 463
 464   if (!isVariadic) {
 465     Diag(Fn->getLocStart(), diag::err_va_start_used_in_non_variadic_function);
 466     return true;
 467   }
 468
 469   // Verify that the second argument to the builtin is the last argument of the
 470   // current function or method.
 471   bool SecondArgIsLastNamedArgument = false;
 472   const Expr *Arg = TheCall->getArg(1)->IgnoreParenCasts();
 473
 474   if (const DeclRefExpr *DR = dyn_cast<DeclRefExpr>(Arg)) {
 475     if (const ParmVarDecl *PV = dyn_cast<ParmVarDecl>(DR->getDecl())) {
 476       // FIXME: This isn't correct for methods (results in bogus warning).
 477       // Get the last formal in the current function.
 478       const ParmVarDecl *LastArg;
 479       if (CurBlock)
 480         LastArg = *(CurBlock->TheDecl->param_end()-1);
 481       else if (FunctionDecl *FD = getCurFunctionDecl())
 482         LastArg = *(FD->param_end()-1);
 483       else
 484         LastArg = *(getCurMethodDecl()->param_end()-1);
 485       SecondArgIsLastNamedArgument = PV == LastArg;
 486     }
 487   }
 488
 489   if (!SecondArgIsLastNamedArgument)
 490     Diag(TheCall->getArg(1)->getLocStart(),
 491          diag::warn_second_parameter_of_va_start_not_last_named_argument);
 492   return false;
 493 }
 494
 495 /// SemaBuiltinUnorderedCompare - Handle functions like __builtin_isgreater and
 496 /// friends.  This is declared to take (...), so we have to check everything.
 497 bool Sema::SemaBuiltinUnorderedCompare(CallExpr *TheCall) {
 498   if (TheCall->getNumArgs() < 2)
 499     return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_few_args)
 500       << 0 /*function call*/;
 501   if (TheCall->getNumArgs() > 2)
 502     return Diag(TheCall->getArg(2)->getLocStart(),
 503                 diag::err_typecheck_call_too_many_args)
 504       << 0 /*function call*/
 505       << SourceRange(TheCall->getArg(2)->getLocStart(),
 506                      (*(TheCall->arg_end()-1))->getLocEnd());
 507
 508   Expr *OrigArg0 = TheCall->getArg(0);
 509   Expr *OrigArg1 = TheCall->getArg(1);
 510
 511   // Do standard promotions between the two arguments, returning their common
 512   // type.
 513   QualType Res = UsualArithmeticConversions(OrigArg0, OrigArg1, false);
 514
 515   // Make sure any conversions are pushed back into the call; this is
 516   // type safe since unordered compare builtins are declared as "_Bool
 517   // foo(...)".
 518   TheCall->setArg(0, OrigArg0);
 519   TheCall->setArg(1, OrigArg1);
 520
 521   if (OrigArg0->isTypeDependent() || OrigArg1->isTypeDependent())
 522     return false;
 523
 524   // If the common type isn't a real floating type, then the arguments were
 525   // invalid for this operation.
 526   if (!Res->isRealFloatingType())
 527     return Diag(OrigArg0->getLocStart(),
 528                 diag::err_typecheck_call_invalid_ordered_compare)
 529       << OrigArg0->getType() << OrigArg1->getType()
 530       << SourceRange(OrigArg0->getLocStart(), OrigArg1->getLocEnd());
 531
 532   return false;
 533 }
 534
 535 bool Sema::SemaBuiltinStackAddress(CallExpr *TheCall) {
 536   // The signature for these builtins is exact; the only thing we need
 537   // to check is that the argument is a constant.
 538   SourceLocation Loc;
 539   if (!TheCall->getArg(0)->isTypeDependent() &&
 540       !TheCall->getArg(0)->isValueDependent() &&
 541       !TheCall->getArg(0)->isIntegerConstantExpr(Context, &Loc))
 542     return Diag(Loc, diag::err_stack_const_level) << TheCall->getSourceRange();
 543
 544   return false;
 545 }
 546
 547 /// SemaBuiltinShuffleVector - Handle __builtin_shufflevector.
 548 // This is declared to take (...), so we have to check everything.
 549 Action::OwningExprResult Sema::SemaBuiltinShuffleVector(CallExpr *TheCall) {
 550   if (TheCall->getNumArgs() < 3)
 551     return ExprError(Diag(TheCall->getLocEnd(),
 552                           diag::err_typecheck_call_too_few_args)
 553       << 0 /*function call*/ << TheCall->getSourceRange());
 554
 555   unsigned numElements = std::numeric_limits<unsigned>::max();
 556   if (!TheCall->getArg(0)->isTypeDependent() &&
 557       !TheCall->getArg(1)->isTypeDependent()) {
 558     QualType FAType = TheCall->getArg(0)->getType();
 559     QualType SAType = TheCall->getArg(1)->getType();
 560
 561     if (!FAType->isVectorType() || !SAType->isVectorType()) {
 562       Diag(TheCall->getLocStart(), diag::err_shufflevector_non_vector)
 563         << SourceRange(TheCall->getArg(0)->getLocStart(),
 564                        TheCall->getArg(1)->getLocEnd());
 565       return ExprError();
 566     }
 567
 568     if (Context.getCanonicalType(FAType).getUnqualifiedType() !=
 569         Context.getCanonicalType(SAType).getUnqualifiedType()) {
 570       Diag(TheCall->getLocStart(), diag::err_shufflevector_incompatible_vector)
 571         << SourceRange(TheCall->getArg(0)->getLocStart(),
 572                        TheCall->getArg(1)->getLocEnd());
 573       return ExprError();
 574     }
 575
 576     numElements = FAType->getAsVectorType()->getNumElements();
 577     if (TheCall->getNumArgs() != numElements+2) {
 578       if (TheCall->getNumArgs() < numElements+2)
 579         return ExprError(Diag(TheCall->getLocEnd(),
 580                               diag::err_typecheck_call_too_few_args)
 581                  << 0 /*function call*/ << TheCall->getSourceRange());
 582       return ExprError(Diag(TheCall->getLocEnd(),
 583                             diag::err_typecheck_call_too_many_args)
 584                  << 0 /*function call*/ << TheCall->getSourceRange());
 585     }
 586   }
 587
 588   for (unsigned i = 2; i < TheCall->getNumArgs(); i++) {
 589     if (TheCall->getArg(i)->isTypeDependent() ||
 590         TheCall->getArg(i)->isValueDependent())
 591       continue;
 592
 593     llvm::APSInt Result(32);
 594     if (!TheCall->getArg(i)->isIntegerConstantExpr(Result, Context))
 595       return ExprError(Diag(TheCall->getLocStart(),
 596                   diag::err_shufflevector_nonconstant_argument)
 597                 << TheCall->getArg(i)->getSourceRange());
 598
 599     if (Result.getActiveBits() > 64 || Result.getZExtValue() >= numElements*2)
 600       return ExprError(Diag(TheCall->getLocStart(),
 601                   diag::err_shufflevector_argument_too_large)
 602                << TheCall->getArg(i)->getSourceRange());
 603   }
 604
 605   llvm::SmallVector<Expr*, 32> exprs;
 606
 607   for (unsigned i = 0, e = TheCall->getNumArgs(); i != e; i++) {
 608     exprs.push_back(TheCall->getArg(i));
 609     TheCall->setArg(i, 0);
 610   }
 611
 612   return Owned(new (Context) ShuffleVectorExpr(exprs.begin(), exprs.size(),
 613                                                exprs[0]->getType(),
 614                                             TheCall->getCallee()->getLocStart(),
 615                                             TheCall->getRParenLoc()));
 616 }
 617
 618 /// SemaBuiltinPrefetch - Handle __builtin_prefetch.
 619 // This is declared to take (const void*, ...) and can take two
 620 // optional constant int args.
 621 bool Sema::SemaBuiltinPrefetch(CallExpr *TheCall) {
 622   unsigned NumArgs = TheCall->getNumArgs();
 623
 624   if (NumArgs > 3)
 625     return Diag(TheCall->getLocEnd(), diag::err_typecheck_call_too_many_args)
 626              << 0 /*function call*/ << TheCall->getSourceRange();
 627
 628   // Argument 0 is checked for us and the remaining arguments must be
 629   // constant integers.
 630   for (unsigned i = 1; i != NumArgs; ++i) {
 631     Expr *Arg = TheCall->getArg(i);
 632     if (Arg->isTypeDependent())
 633       continue;
 634
 635     QualType RWType = Arg->getType();
 636
 637     const BuiltinType *BT = RWType->getAsBuiltinType();
 638     llvm::APSInt Result;
 639     if (!BT || BT->getKind() != BuiltinType::Int)
 640       return Diag(TheCall->getLocStart(), diag::err_prefetch_invalid_argument)
 641               << SourceRange(Arg->getLocStart(), Arg->getLocEnd());
 642
 643     if (Arg->isValueDependent())
 644       continue;
 645
 646     if (!Arg->isIntegerConstantExpr(Result, Context))
 647       return Diag(TheCall->getLocStart(), diag::err_prefetch_invalid_argument)
 648         << SourceRange(Arg->getLocStart(), Arg->getLocEnd());
 649
 650     // FIXME: gcc issues a warning and rewrites these to 0. These
 651     // seems especially odd for the third argument since the default
 652     // is 3.
 653     if (i == 1) {
 654       if (Result.getSExtValue() < 0 || Result.getSExtValue() > 1)
 655         return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range)
 656              << "0" << "1" << SourceRange(Arg->getLocStart(), Arg->getLocEnd());
 657     } else {
 658       if (Result.getSExtValue() < 0 || Result.getSExtValue() > 3)
 659         return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range)
 660             << "0" << "3" << SourceRange(Arg->getLocStart(), Arg->getLocEnd());
 661     }
 662   }
 663
 664   return false;
 665 }
 666
 667 /// SemaBuiltinObjectSize - Handle __builtin_object_size(void *ptr,
 668 /// int type). This simply type checks that type is one of the defined
 669 /// constants (0-3).
 670 bool Sema::SemaBuiltinObjectSize(CallExpr *TheCall) {
 671   Expr *Arg = TheCall->getArg(1);
 672   if (Arg->isTypeDependent())
 673     return false;
 674
 675   QualType ArgType = Arg->getType();
 676   const BuiltinType *BT = ArgType->getAsBuiltinType();
 677   llvm::APSInt Result(32);
 678   if (!BT || BT->getKind() != BuiltinType::Int)
 679     return Diag(TheCall->getLocStart(), diag::err_object_size_invalid_argument)
 680              << SourceRange(Arg->getLocStart(), Arg->getLocEnd());
 681
 682   if (Arg->isValueDependent())
 683     return false;
 684
 685   if (!Arg->isIntegerConstantExpr(Result, Context)) {
 686     return Diag(TheCall->getLocStart(), diag::err_object_size_invalid_argument)
 687              << SourceRange(Arg->getLocStart(), Arg->getLocEnd());
 688   }
 689
 690   if (Result.getSExtValue() < 0 || Result.getSExtValue() > 3) {
 691     return Diag(TheCall->getLocStart(), diag::err_argument_invalid_range)
 692              << "0" << "3" << SourceRange(Arg->getLocStart(), Arg->getLocEnd());
 693   }
 694
 695   return false;
 696 }
 697
 698 /// SemaBuiltinLongjmp - Handle __builtin_longjmp(void *env[5], int val).
 699 /// This checks that val is a constant 1.
 700 bool Sema::SemaBuiltinLongjmp(CallExpr *TheCall) {
 701   Expr *Arg = TheCall->getArg(1);
 702   if (Arg->isTypeDependent() || Arg->isValueDependent())
 703     return false;
 704
 705   llvm::APSInt Result(32);
 706   if (!Arg->isIntegerConstantExpr(Result, Context) || Result != 1)
 707     return Diag(TheCall->getLocStart(), diag::err_builtin_longjmp_invalid_val)
 708              << SourceRange(Arg->getLocStart(), Arg->getLocEnd());
 709
 710   return false;
 711 }
 712
 713 // Handle i > 1 ? "x" : "y", recursivelly
 714 bool Sema::SemaCheckStringLiteral(const Expr *E, const CallExpr *TheCall,
 715                                   bool HasVAListArg,
 716                                   unsigned format_idx, unsigned firstDataArg) {
 717   if (E->isTypeDependent() || E->isValueDependent())
 718     return false;
 719
 720   switch (E->getStmtClass()) {
 721   case Stmt::ConditionalOperatorClass: {
 722     const ConditionalOperator *C = cast<ConditionalOperator>(E);
 723     return SemaCheckStringLiteral(C->getLHS(), TheCall,
 724                                   HasVAListArg, format_idx, firstDataArg)
 725         && SemaCheckStringLiteral(C->getRHS(), TheCall,
 726                                   HasVAListArg, format_idx, firstDataArg);
 727   }
 728
 729   case Stmt::ImplicitCastExprClass: {
 730     const ImplicitCastExpr *Expr = cast<ImplicitCastExpr>(E);
 731     return SemaCheckStringLiteral(Expr->getSubExpr(), TheCall, HasVAListArg,
 732                                   format_idx, firstDataArg);
 733   }
 734
 735   case Stmt::ParenExprClass: {
 736     const ParenExpr *Expr = cast<ParenExpr>(E);
 737     return SemaCheckStringLiteral(Expr->getSubExpr(), TheCall, HasVAListArg,
 738                                   format_idx, firstDataArg);
 739   }
 740
 741   case Stmt::DeclRefExprClass: {
 742     const DeclRefExpr *DR = cast<DeclRefExpr>(E);
 743
 744     // As an exception, do not flag errors for variables binding to
 745     // const string literals.
 746     if (const VarDecl *VD = dyn_cast<VarDecl>(DR->getDecl())) {
 747       bool isConstant = false;
 748       QualType T = DR->getType();
 749
 750       if (const ArrayType *AT = Context.getAsArrayType(T)) {
 751         isConstant = AT->getElementType().isConstant(Context);
 752       }
 753       else if (const PointerType *PT = T->getAsPointerType()) {
 754         isConstant = T.isConstant(Context) &&
 755                      PT->getPointeeType().isConstant(Context);
 756       }
 757
 758       if (isConstant) {
 759         const VarDecl *Def = 0;
 760         if (const Expr *Init = VD->getDefinition(Def))
 761           return SemaCheckStringLiteral(Init, TheCall,
 762                                         HasVAListArg, format_idx, firstDataArg);
 763       }
 764
 765       // For vprintf* functions (i.e., HasVAListArg==true), we add a
 766       // special check to see if the format string is a function parameter
 767       // of the function calling the printf function.  If the function
 768       // has an attribute indicating it is a printf-like function, then we
 769       // should suppress warnings concerning non-literals being used in a call
 770       // to a vprintf function.  For example:
 771       //
 772       // void
 773       // logmessage(char const *fmt __attribute__ (format (printf, 1, 2)), ...){
 774       //      va_list ap;
 775       //      va_start(ap, fmt);
 776       //      vprintf(fmt, ap);  // Do NOT emit a warning about "fmt".
 777       //      ...
 778       //
 779       //
 780       //  FIXME: We don't have full attribute support yet, so just check to see
 781       //    if the argument is a DeclRefExpr that references a parameter.  We'll
 782       //    add proper support for checking the attribute later.
 783       if (HasVAListArg)
 784         if (isa<ParmVarDecl>(VD))
 785           return true;
 786     }
 787
 788     return false;
 789   }
 790
 791   case Stmt::CallExprClass: {
 792     const CallExpr *CE = cast<CallExpr>(E);
 793     if (const ImplicitCastExpr *ICE
 794           = dyn_cast<ImplicitCastExpr>(CE->getCallee())) {
 795       if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(ICE->getSubExpr())) {
 796         if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(DRE->getDecl())) {
 797           if (const FormatArgAttr *FA = FD->getAttr<FormatArgAttr>()) {
 798             unsigned ArgIndex = FA->getFormatIdx();
 799             const Expr *Arg = CE->getArg(ArgIndex - 1);
 800
 801             return SemaCheckStringLiteral(Arg, TheCall, HasVAListArg,
 802                                           format_idx, firstDataArg);
 803           }
 804         }
 805       }
 806     }
 807
 808     return false;
 809   }
 810   case Stmt::ObjCStringLiteralClass:
 811   case Stmt::StringLiteralClass: {
 812     const StringLiteral *StrE = NULL;
 813
 814     if (const ObjCStringLiteral *ObjCFExpr = dyn_cast<ObjCStringLiteral>(E))
 815       StrE = ObjCFExpr->getString();
 816     else
 817       StrE = cast<StringLiteral>(E);
 818
 819     if (StrE) {
 820       CheckPrintfString(StrE, E, TheCall, HasVAListArg, format_idx,
 821                         firstDataArg);
 822       return true;
 823     }
 824
 825     return false;
 826   }
 827
 828   default:
 829     return false;
 830   }
 831 }
 832
 833 void
 834 Sema::CheckNonNullArguments(const NonNullAttr *NonNull, const CallExpr *TheCall)
 835 {
 836   for (NonNullAttr::iterator i = NonNull->begin(), e = NonNull->end();
 837        i != e; ++i) {
 838     const Expr *ArgExpr = TheCall->getArg(*i);
 839     if (ArgExpr->isNullPointerConstant(Context))
 840       Diag(TheCall->getCallee()->getLocStart(), diag::warn_null_arg)
 841         << ArgExpr->getSourceRange();
 842   }
 843 }
 844
 845 /// CheckPrintfArguments - Check calls to printf (and similar functions) for
 846 /// correct use of format strings.
 847 ///
 848 ///  HasVAListArg - A predicate indicating whether the printf-like
 849 ///    function is passed an explicit va_arg argument (e.g., vprintf)
 850 ///
 851 ///  format_idx - The index into Args for the format string.
 852 ///
 853 /// Improper format strings to functions in the printf family can be
 854 /// the source of bizarre bugs and very serious security holes.  A
 855 /// good source of information is available in the following paper
 856 /// (which includes additional references):
 857 ///
 858 ///  FormatGuard: Automatic Protection From printf Format String
 859 ///  Vulnerabilities, Proceedings of the 10th USENIX Security Symposium, 2001.
 860 ///
 861 /// Functionality implemented:
 862 ///
 863 ///  We can statically check the following properties for string
 864 ///  literal format strings for non v.*printf functions (where the
 865 ///  arguments are passed directly):
 866 //
 867 ///  (1) Are the number of format conversions equal to the number of
 868 ///      data arguments?
 869 ///
 870 ///  (2) Does each format conversion correctly match the type of the
 871 ///      corresponding data argument?  (TODO)
 872 ///
 873 /// Moreover, for all printf functions we can:
 874 ///
 875 ///  (3) Check for a missing format string (when not caught by type checking).
 876 ///
 877 ///  (4) Check for no-operation flags; e.g. using "#" with format
 878 ///      conversion 'c'  (TODO)
 879 ///
 880 ///  (5) Check the use of '%n', a major source of security holes.
 881 ///
 882 ///  (6) Check for malformed format conversions that don't specify anything.
 883 ///
 884 ///  (7) Check for empty format strings.  e.g: printf("");
 885 ///
 886 ///  (8) Check that the format string is a wide literal.
 887 ///
 888 ///  (9) Also check the arguments of functions with the __format__ attribute.
 889 ///      (TODO).
 890 ///
 891 /// All of these checks can be done by parsing the format string.
 892 ///
 893 /// For now, we ONLY do (1), (3), (5), (6), (7), and (8).
 894 void
 895 Sema::CheckPrintfArguments(const CallExpr *TheCall, bool HasVAListArg,
 896                            unsigned format_idx, unsigned firstDataArg) {
 897   const Expr *Fn = TheCall->getCallee();
 898
 899   // CHECK: printf-like function is called with no format string.
 900   if (format_idx >= TheCall->getNumArgs()) {
 901     Diag(TheCall->getRParenLoc(), diag::warn_printf_missing_format_string)
 902       << Fn->getSourceRange();
 903     return;
 904   }
 905
 906   const Expr *OrigFormatExpr = TheCall->getArg(format_idx)->IgnoreParenCasts();
 907
 908   // CHECK: format string is not a string literal.
 909   //
 910   // Dynamically generated format strings are difficult to
 911   // automatically vet at compile time.  Requiring that format strings
 912   // are string literals: (1) permits the checking of format strings by
 913   // the compiler and thereby (2) can practically remove the source of
 914   // many format string exploits.
 915
 916   // Format string can be either ObjC string (e.g. @"%d") or
 917   // C string (e.g. "%d")
 918   // ObjC string uses the same format specifiers as C string, so we can use
 919   // the same format string checking logic for both ObjC and C strings.
 920   if (SemaCheckStringLiteral(OrigFormatExpr, TheCall, HasVAListArg, format_idx,
 921                              firstDataArg))
 922     return;  // Literal format string found, check done!
 923
 924   // If there are no arguments specified, warn with -Wformat-security, otherwise
 925   // warn only with -Wformat-nonliteral.
 926   if (TheCall->getNumArgs() == format_idx+1)
 927     Diag(TheCall->getArg(format_idx)->getLocStart(),
 928          diag::warn_printf_nonliteral_noargs)
 929       << OrigFormatExpr->getSourceRange();
 930   else
 931     Diag(TheCall->getArg(format_idx)->getLocStart(),
 932          diag::warn_printf_nonliteral)
 933            << OrigFormatExpr->getSourceRange();
 934 }
 935
 936 void Sema::CheckPrintfString(const StringLiteral *FExpr,
 937                              const Expr *OrigFormatExpr,
 938                              const CallExpr *TheCall, bool HasVAListArg,
 939                              unsigned format_idx, unsigned firstDataArg) {
 940
 941   const ObjCStringLiteral *ObjCFExpr =
 942     dyn_cast<ObjCStringLiteral>(OrigFormatExpr);
 943
 944   // CHECK: is the format string a wide literal?
 945   if (FExpr->isWide()) {
 946     Diag(FExpr->getLocStart(),
 947          diag::warn_printf_format_string_is_wide_literal)
 948       << OrigFormatExpr->getSourceRange();
 949     return;
 950   }
 951
 952   // Str - The format string.  NOTE: this is NOT null-terminated!
 953   const char *Str = FExpr->getStrData();
 954
 955   // CHECK: empty format string?
 956   unsigned StrLen = FExpr->getByteLength();
 957
 958   if (StrLen == 0) {
 959     Diag(FExpr->getLocStart(), diag::warn_printf_empty_format_string)
 960       << OrigFormatExpr->getSourceRange();
 961     return;
 962   }
 963
 964   // We process the format string using a binary state machine.  The
 965   // current state is stored in CurrentState.
 966   enum {
 967     state_OrdChr,
 968     state_Conversion
 969   } CurrentState = state_OrdChr;
 970
 971   // numConversions - The number of conversions seen so far.  This is
 972   //  incremented as we traverse the format string.
 973   unsigned numConversions = 0;
 974
 975   // numDataArgs - The number of data arguments after the format
 976   //  string.  This can only be determined for non vprintf-like
 977   //  functions.  For those functions, this value is 1 (the sole
 978   //  va_arg argument).
 979   unsigned numDataArgs = TheCall->getNumArgs()-firstDataArg;
 980
 981   // Inspect the format string.
 982   unsigned StrIdx = 0;
 983
 984   // LastConversionIdx - Index within the format string where we last saw
 985   //  a '%' character that starts a new format conversion.
 986   unsigned LastConversionIdx = 0;
 987
 988   for (; StrIdx < StrLen; ++StrIdx) {
 989
 990     // Is the number of detected conversion conversions greater than
 991     // the number of matching data arguments?  If so, stop.
 992     if (!HasVAListArg && numConversions > numDataArgs) break;
 993
 994     // Handle "\0"
 995     if (Str[StrIdx] == '\0') {
 996       // The string returned by getStrData() is not null-terminated,
 997       // so the presence of a null character is likely an error.
 998       Diag(getLocationOfStringLiteralByte(FExpr, StrIdx),
 999            diag::warn_printf_format_string_contains_null_char)
1000         <<  OrigFormatExpr->getSourceRange();
1001       return;
1002     }
1003
1004     // Ordinary characters (not processing a format conversion).
1005     if (CurrentState == state_OrdChr) {
1006       if (Str[StrIdx] == '%') {
1007         CurrentState = state_Conversion;
1008         LastConversionIdx = StrIdx;
1009       }
1010       continue;
1011     }
1012
1013     // Seen '%'.  Now processing a format conversion.
1014     switch (Str[StrIdx]) {
1015     // Handle dynamic precision or width specifier.
1016     case '*': {
1017       ++numConversions;
1018
1019       if (!HasVAListArg) {
1020         if (numConversions > numDataArgs) {
1021           SourceLocation Loc = getLocationOfStringLiteralByte(FExpr, StrIdx);
1022
1023           if (Str[StrIdx-1] == '.')
1024             Diag(Loc, diag::warn_printf_asterisk_precision_missing_arg)
1025               << OrigFormatExpr->getSourceRange();
1026           else
1027             Diag(Loc, diag::warn_printf_asterisk_width_missing_arg)
1028               << OrigFormatExpr->getSourceRange();
1029
1030           // Don't do any more checking.  We'll just emit spurious errors.
1031           return;
1032         }
1033
1034         // Perform type checking on width/precision specifier.
1035         const Expr *E = TheCall->getArg(format_idx+numConversions);
1036         if (const BuiltinType *BT = E->getType()->getAsBuiltinType())
1037           if (BT->getKind() == BuiltinType::Int)
1038             break;
1039
1040         SourceLocation Loc = getLocationOfStringLiteralByte(FExpr, StrIdx);
1041
1042         if (Str[StrIdx-1] == '.')
1043           Diag(Loc, diag::warn_printf_asterisk_precision_wrong_type)
1044           << E->getType() << E->getSourceRange();
1045         else
1046           Diag(Loc, diag::warn_printf_asterisk_width_wrong_type)
1047           << E->getType() << E->getSourceRange();
1048
1049         break;
1050       }
1051     }
1052
1053     // Characters which can terminate a format conversion
1054     // (e.g. "%d").  Characters that specify length modifiers or
1055     // other flags are handled by the default case below.
1056     //
1057     // FIXME: additional checks will go into the following cases.
1058     case 'i':
1059     case 'd':
1060     case 'o':
1061     case 'u':
1062     case 'x':
1063     case 'X':
1064     case 'D':
1065     case 'O':
1066     case 'U':
1067     case 'e':
1068     case 'E':
1069     case 'f':
1070     case 'F':
1071     case 'g':
1072     case 'G':
1073     case 'a':
1074     case 'A':
1075     case 'c':
1076     case 'C':
1077     case 'S':
1078     case 's':
1079     case 'p':
1080       ++numConversions;
1081       CurrentState = state_OrdChr;
1082       break;
1083
1084     case 'm':
1085       // FIXME: Warn in situations where this isn't supported!
1086       CurrentState = state_OrdChr;
1087       break;
1088
1089     // CHECK: Are we using "%n"?  Issue a warning.
1090     case 'n': {
1091       ++numConversions;
1092       CurrentState = state_OrdChr;
1093       SourceLocation Loc = getLocationOfStringLiteralByte(FExpr,
1094                                                           LastConversionIdx);
1095
1096       Diag(Loc, diag::warn_printf_write_back)<<OrigFormatExpr->getSourceRange();
1097       break;
1098     }
1099
1100     // Handle "%@"
1101     case '@':
1102       // %@ is allowed in ObjC format strings only.
1103       if(ObjCFExpr != NULL)
1104         CurrentState = state_OrdChr;
1105       else {
1106         // Issue a warning: invalid format conversion.
1107         SourceLocation Loc =
1108           getLocationOfStringLiteralByte(FExpr, LastConversionIdx);
1109
1110         Diag(Loc, diag::warn_printf_invalid_conversion)
1111           <<  std::string(Str+LastConversionIdx,
1112                           Str+std::min(LastConversionIdx+2, StrLen))
1113           << OrigFormatExpr->getSourceRange();
1114       }
1115       ++numConversions;
1116       break;
1117
1118     // Handle "%%"
1119     case '%':
1120       // Sanity check: Was the first "%" character the previous one?
1121       // If not, we will assume that we have a malformed format
1122       // conversion, and that the current "%" character is the start
1123       // of a new conversion.
1124       if (StrIdx - LastConversionIdx == 1)
1125         CurrentState = state_OrdChr;
1126       else {
1127         // Issue a warning: invalid format conversion.
1128         SourceLocation Loc =
1129           getLocationOfStringLiteralByte(FExpr, LastConversionIdx);
1130
1131         Diag(Loc, diag::warn_printf_invalid_conversion)
1132           << std::string(Str+LastConversionIdx, Str+StrIdx)
1133           << OrigFormatExpr->getSourceRange();
1134
1135         // This conversion is broken.  Advance to the next format
1136         // conversion.
1137         LastConversionIdx = StrIdx;
1138         ++numConversions;
1139       }
1140       break;
1141
1142     default:
1143       // This case catches all other characters: flags, widths, etc.
1144       // We should eventually process those as well.
1145       break;
1146     }
1147   }
1148
1149   if (CurrentState == state_Conversion) {
1150     // Issue a warning: invalid format conversion.
1151     SourceLocation Loc =
1152       getLocationOfStringLiteralByte(FExpr, LastConversionIdx);
1153
1154     Diag(Loc, diag::warn_printf_invalid_conversion)
1155       << std::string(Str+LastConversionIdx,
1156                      Str+std::min(LastConversionIdx+2, StrLen))
1157       << OrigFormatExpr->getSourceRange();
1158     return;
1159   }
1160
1161   if (!HasVAListArg) {
1162     // CHECK: Does the number of format conversions exceed the number
1163     //        of data arguments?
1164     if (numConversions > numDataArgs) {
1165       SourceLocation Loc =
1166         getLocationOfStringLiteralByte(FExpr, LastConversionIdx);
1167
1168       Diag(Loc, diag::warn_printf_insufficient_data_args)
1169         << OrigFormatExpr->getSourceRange();
1170     }
1171     // CHECK: Does the number of data arguments exceed the number of
1172     //        format conversions in the format string?
1173     else if (numConversions < numDataArgs)
1174       Diag(TheCall->getArg(format_idx+numConversions+1)->getLocStart(),
1175            diag::warn_printf_too_many_data_args)
1176         << OrigFormatExpr->getSourceRange();
1177   }
1178 }
1179
1180 //===--- CHECK: Return Address of Stack Variable --------------------------===//
1181
1182 static DeclRefExpr* EvalVal(Expr *E);
1183 static DeclRefExpr* EvalAddr(Expr* E);
1184
1185 /// CheckReturnStackAddr - Check if a return statement returns the address
1186 ///   of a stack variable.
1187 void
1188 Sema::CheckReturnStackAddr(Expr *RetValExp, QualType lhsType,
1189                            SourceLocation ReturnLoc) {
1190
1191   // Perform checking for returned stack addresses.
1192   if (lhsType->isPointerType() || lhsType->isBlockPointerType()) {
1193     if (DeclRefExpr *DR = EvalAddr(RetValExp))
1194       Diag(DR->getLocStart(), diag::warn_ret_stack_addr)
1195        << DR->getDecl()->getDeclName() << RetValExp->getSourceRange();
1196
1197     // Skip over implicit cast expressions when checking for block expressions.
1198     if (ImplicitCastExpr *IcExpr =
1199           dyn_cast_or_null<ImplicitCastExpr>(RetValExp))
1200       RetValExp = IcExpr->getSubExpr();
1201
1202     if (BlockExpr *C = dyn_cast_or_null<BlockExpr>(RetValExp))
1203       if (C->hasBlockDeclRefExprs())
1204         Diag(C->getLocStart(), diag::err_ret_local_block)
1205           << C->getSourceRange();
1206   }
1207   // Perform checking for stack values returned by reference.
1208   else if (lhsType->isReferenceType()) {
1209     // Check for a reference to the stack
1210     if (DeclRefExpr *DR = EvalVal(RetValExp))
1211       Diag(DR->getLocStart(), diag::warn_ret_stack_ref)
1212         << DR->getDecl()->getDeclName() << RetValExp->getSourceRange();
1213   }
1214 }
1215
1216 /// EvalAddr - EvalAddr and EvalVal are mutually recursive functions that
1217 ///  check if the expression in a return statement evaluates to an address
1218 ///  to a location on the stack.  The recursion is used to traverse the
1219 ///  AST of the return expression, with recursion backtracking when we
1220 ///  encounter a subexpression that (1) clearly does not lead to the address
1221 ///  of a stack variable or (2) is something we cannot determine leads to
1222 ///  the address of a stack variable based on such local checking.
1223 ///
1224 ///  EvalAddr processes expressions that are pointers that are used as
1225 ///  references (and not L-values).  EvalVal handles all other values.
1226 ///  At the base case of the recursion is a check for a DeclRefExpr* in
1227 ///  the refers to a stack variable.
1228 ///
1229 ///  This implementation handles:
1230 ///
1231 ///   * pointer-to-pointer casts
1232 ///   * implicit conversions from array references to pointers
1233 ///   * taking the address of fields
1234 ///   * arbitrary interplay between "&" and "*" operators
1235 ///   * pointer arithmetic from an address of a stack variable
1236 ///   * taking the address of an array element where the array is on the stack
1237 static DeclRefExpr* EvalAddr(Expr *E) {
1238   // We should only be called for evaluating pointer expressions.
1239   assert((E->getType()->isPointerType() ||
1240           E->getType()->isBlockPointerType() ||
1241           E->getType()->isObjCQualifiedIdType()) &&
1242          "EvalAddr only works on pointers");
1243
1244   // Our "symbolic interpreter" is just a dispatch off the currently
1245   // viewed AST node.  We then recursively traverse the AST by calling
1246   // EvalAddr and EvalVal appropriately.
1247   switch (E->getStmtClass()) {
1248   case Stmt::ParenExprClass:
1249     // Ignore parentheses.
1250     return EvalAddr(cast<ParenExpr>(E)->getSubExpr());
1251
1252   case Stmt::UnaryOperatorClass: {
1253     // The only unary operator that make sense to handle here
1254     // is AddrOf.  All others don't make sense as pointers.
1255     UnaryOperator *U = cast<UnaryOperator>(E);
1256
1257     if (U->getOpcode() == UnaryOperator::AddrOf)
1258       return EvalVal(U->getSubExpr());
1259     else
1260       return NULL;
1261   }
1262
1263   case Stmt::BinaryOperatorClass: {
1264     // Handle pointer arithmetic.  All other binary operators are not valid
1265     // in this context.
1266     BinaryOperator *B = cast<BinaryOperator>(E);
1267     BinaryOperator::Opcode op = B->getOpcode();
1268
1269     if (op != BinaryOperator::Add && op != BinaryOperator::Sub)
1270       return NULL;
1271
1272     Expr *Base = B->getLHS();
1273
1274     // Determine which argument is the real pointer base.  It could be
1275     // the RHS argument instead of the LHS.
1276     if (!Base->getType()->isPointerType()) Base = B->getRHS();
1277
1278     assert (Base->getType()->isPointerType());
1279     return EvalAddr(Base);
1280   }
1281
1282   // For conditional operators we need to see if either the LHS or RHS are
1283   // valid DeclRefExpr*s.  If one of them is valid, we return it.
1284   case Stmt::ConditionalOperatorClass: {
1285     ConditionalOperator *C = cast<ConditionalOperator>(E);
1286
1287     // Handle the GNU extension for missing LHS.
1288     if (Expr *lhsExpr = C->getLHS())
1289       if (DeclRefExpr* LHS = EvalAddr(lhsExpr))
1290         return LHS;
1291
1292      return EvalAddr(C->getRHS());
1293   }
1294
1295   // For casts, we need to handle conversions from arrays to
1296   // pointer values, and pointer-to-pointer conversions.
1297   case Stmt::ImplicitCastExprClass:
1298   case Stmt::CStyleCastExprClass:
1299   case Stmt::CXXFunctionalCastExprClass: {
1300     Expr* SubExpr = cast<CastExpr>(E)->getSubExpr();
1301     QualType T = SubExpr->getType();
1302
1303     if (SubExpr->getType()->isPointerType() ||
1304         SubExpr->getType()->isBlockPointerType() ||
1305         SubExpr->getType()->isObjCQualifiedIdType())
1306       return EvalAddr(SubExpr);
1307     else if (T->isArrayType())
1308       return EvalVal(SubExpr);
1309     else
1310       return 0;
1311   }
1312
1313   // C++ casts.  For dynamic casts, static casts, and const casts, we
1314   // are always converting from a pointer-to-pointer, so we just blow
1315   // through the cast.  In the case the dynamic cast doesn't fail (and
1316   // return NULL), we take the conservative route and report cases
1317   // where we return the address of a stack variable.  For Reinterpre
1318   // FIXME: The comment about is wrong; we're not always converting
1319   // from pointer to pointer. I'm guessing that this code should also
1320   // handle references to objects.
1321   case Stmt::CXXStaticCastExprClass:
1322   case Stmt::CXXDynamicCastExprClass:
1323   case Stmt::CXXConstCastExprClass:
1324   case Stmt::CXXReinterpretCastExprClass: {
1325       Expr *S = cast<CXXNamedCastExpr>(E)->getSubExpr();
1326       if (S->getType()->isPointerType() || S->getType()->isBlockPointerType())
1327         return EvalAddr(S);
1328       else
1329         return NULL;
1330   }
1331
1332   // Everything else: we simply don't reason about them.
1333   default:
1334     return NULL;
1335   }
1336 }
1337
1338
1339 ///  EvalVal - This function is complements EvalAddr in the mutual recursion.
1340 ///   See the comments for EvalAddr for more details.
1341 static DeclRefExpr* EvalVal(Expr *E) {
1342
1343   // We should only be called for evaluating non-pointer expressions, or
1344   // expressions with a pointer type that are not used as references but instead
1345   // are l-values (e.g., DeclRefExpr with a pointer type).
1346
1347   // Our "symbolic interpreter" is just a dispatch off the currently
1348   // viewed AST node.  We then recursively traverse the AST by calling
1349   // EvalAddr and EvalVal appropriately.
1350   switch (E->getStmtClass()) {
1351   case Stmt::DeclRefExprClass:
1352   case Stmt::QualifiedDeclRefExprClass: {
1353     // DeclRefExpr: the base case.  When we hit a DeclRefExpr we are looking
1354     //  at code that refers to a variable's name.  We check if it has local
1355     //  storage within the function, and if so, return the expression.
1356     DeclRefExpr *DR = cast<DeclRefExpr>(E);
1357
1358     if (VarDecl *V = dyn_cast<VarDecl>(DR->getDecl()))
1359       if(V->hasLocalStorage() && !V->getType()->isReferenceType()) return DR;
1360
1361     return NULL;
1362   }
1363
1364   case Stmt::ParenExprClass:
1365     // Ignore parentheses.
1366     return EvalVal(cast<ParenExpr>(E)->getSubExpr());
1367
1368   case Stmt::UnaryOperatorClass: {
1369     // The only unary operator that make sense to handle here
1370     // is Deref.  All others don't resolve to a "name."  This includes
1371     // handling all sorts of rvalues passed to a unary operator.
1372     UnaryOperator *U = cast<UnaryOperator>(E);
1373
1374     if (U->getOpcode() == UnaryOperator::Deref)
1375       return EvalAddr(U->getSubExpr());
1376
1377     return NULL;
1378   }
1379
1380   case Stmt::ArraySubscriptExprClass: {
1381     // Array subscripts are potential references to data on the stack.  We
1382     // retrieve the DeclRefExpr* for the array variable if it indeed
1383     // has local storage.
1384     return EvalAddr(cast<ArraySubscriptExpr>(E)->getBase());
1385   }
1386
1387   case Stmt::ConditionalOperatorClass: {
1388     // For conditional operators we need to see if either the LHS or RHS are
1389     // non-NULL DeclRefExpr's.  If one is non-NULL, we return it.
1390     ConditionalOperator *C = cast<ConditionalOperator>(E);
1391
1392     // Handle the GNU extension for missing LHS.
1393     if (Expr *lhsExpr = C->getLHS())
1394       if (DeclRefExpr *LHS = EvalVal(lhsExpr))
1395         return LHS;
1396
1397     return EvalVal(C->getRHS());
1398   }
1399
1400   // Accesses to members are potential references to data on the stack.
1401   case Stmt::MemberExprClass: {
1402     MemberExpr *M = cast<MemberExpr>(E);
1403
1404     // Check for indirect access.  We only want direct field accesses.
1405     if (!M->isArrow())
1406       return EvalVal(M->getBase());
1407     else
1408       return NULL;
1409   }
1410
1411   // Everything else: we simply don't reason about them.
1412   default:
1413     return NULL;
1414   }
1415 }
1416
1417 //===--- CHECK: Floating-Point comparisons (-Wfloat-equal) ---------------===//
1418
1419 /// Check for comparisons of floating point operands using != and ==.
1420 /// Issue a warning if these are no self-comparisons, as they are not likely
1421 /// to do what the programmer intended.
1422 void Sema::CheckFloatComparison(SourceLocation loc, Expr* lex, Expr *rex) {
1423   bool EmitWarning = true;
1424
1425   Expr* LeftExprSansParen = lex->IgnoreParens();
1426   Expr* RightExprSansParen = rex->IgnoreParens();
1427
1428   // Special case: check for x == x (which is OK).
1429   // Do not emit warnings for such cases.
1430   if (DeclRefExpr* DRL = dyn_cast<DeclRefExpr>(LeftExprSansParen))
1431     if (DeclRefExpr* DRR = dyn_cast<DeclRefExpr>(RightExprSansParen))
1432       if (DRL->getDecl() == DRR->getDecl())
1433         EmitWarning = false;
1434
1435
1436   // Special case: check for comparisons against literals that can be exactly
1437   //  represented by APFloat.  In such cases, do not emit a warning.  This
1438   //  is a heuristic: often comparison against such literals are used to
1439   //  detect if a value in a variable has not changed.  This clearly can
1440   //  lead to false negatives.
1441   if (EmitWarning) {
1442     if (FloatingLiteral* FLL = dyn_cast<FloatingLiteral>(LeftExprSansParen)) {
1443       if (FLL->isExact())
1444         EmitWarning = false;
1445     }
1446     else
1447       if (FloatingLiteral* FLR = dyn_cast<FloatingLiteral>(RightExprSansParen)){
1448         if (FLR->isExact())
1449           EmitWarning = false;
1450     }
1451   }
1452
1453   // Check for comparisons with builtin types.
1454   if (EmitWarning)
1455     if (CallExpr* CL = dyn_cast<CallExpr>(LeftExprSansParen))
1456       if (CL->isBuiltinCall(Context))
1457         EmitWarning = false;
1458
1459   if (EmitWarning)
1460     if (CallExpr* CR = dyn_cast<CallExpr>(RightExprSansParen))
1461       if (CR->isBuiltinCall(Context))
1462         EmitWarning = false;
1463
1464   // Emit the diagnostic.
1465   if (EmitWarning)
1466     Diag(loc, diag::warn_floatingpoint_eq)
1467       << lex->getSourceRange() << rex->getSourceRange();
1468 }