1 //= CStringChecker.h - Checks calls to C string functions ----------*- C++ -*-//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This defines CStringChecker, which is an assortment of checks on calls
11 // to functions in <string.h>.
13 //===----------------------------------------------------------------------===//
15 #include "ExprEngineExperimentalChecks.h"
16 #include "clang/StaticAnalyzer/BugReporter/BugType.h"
17 #include "clang/StaticAnalyzer/PathSensitive/CheckerVisitor.h"
18 #include "clang/StaticAnalyzer/PathSensitive/GRStateTrait.h"
19 #include "llvm/ADT/StringSwitch.h"
21 using namespace clang
;
25 class CStringChecker
: public CheckerVisitor
<CStringChecker
> {
26 BugType
*BT_Null
, *BT_Bounds
, *BT_BoundsWrite
, *BT_Overlap
, *BT_NotCString
;
29 : BT_Null(0), BT_Bounds(0), BT_BoundsWrite(0), BT_Overlap(0), BT_NotCString(0)
31 static void *getTag() { static int tag
; return &tag
; }
33 bool evalCallExpr(CheckerContext
&C
, const CallExpr
*CE
);
34 void PreVisitDeclStmt(CheckerContext
&C
, const DeclStmt
*DS
);
35 void MarkLiveSymbols(const GRState
*state
, SymbolReaper
&SR
);
36 void evalDeadSymbols(CheckerContext
&C
, SymbolReaper
&SR
);
37 bool WantsRegionChangeUpdate(const GRState
*state
);
39 const GRState
*EvalRegionChanges(const GRState
*state
,
40 const MemRegion
* const *Begin
,
41 const MemRegion
* const *End
,
44 typedef void (CStringChecker::*FnCheck
)(CheckerContext
&, const CallExpr
*);
46 void evalMemcpy(CheckerContext
&C
, const CallExpr
*CE
);
47 void evalMemmove(CheckerContext
&C
, const CallExpr
*CE
);
48 void evalBcopy(CheckerContext
&C
, const CallExpr
*CE
);
49 void evalCopyCommon(CheckerContext
&C
, const GRState
*state
,
50 const Expr
*Size
, const Expr
*Source
, const Expr
*Dest
,
51 bool Restricted
= false);
53 void evalMemcmp(CheckerContext
&C
, const CallExpr
*CE
);
55 void evalstrLength(CheckerContext
&C
, const CallExpr
*CE
);
57 void evalStrcpy(CheckerContext
&C
, const CallExpr
*CE
);
58 void evalStpcpy(CheckerContext
&C
, const CallExpr
*CE
);
59 void evalStrcpyCommon(CheckerContext
&C
, const CallExpr
*CE
, bool returnEnd
);
62 std::pair
<const GRState
*, const GRState
*>
63 assumeZero(CheckerContext
&C
, const GRState
*state
, SVal V
, QualType Ty
);
65 const GRState
*setCStringLength(const GRState
*state
, const MemRegion
*MR
,
67 SVal
getCStringLengthForRegion(CheckerContext
&C
, const GRState
*&state
,
68 const Expr
*Ex
, const MemRegion
*MR
);
69 SVal
getCStringLength(CheckerContext
&C
, const GRState
*&state
,
70 const Expr
*Ex
, SVal Buf
);
72 const GRState
*InvalidateBuffer(CheckerContext
&C
, const GRState
*state
,
73 const Expr
*Ex
, SVal V
);
75 bool SummarizeRegion(llvm::raw_ostream
& os
, ASTContext
& Ctx
,
79 const GRState
*checkNonNull(CheckerContext
&C
, const GRState
*state
,
80 const Expr
*S
, SVal l
);
81 const GRState
*CheckLocation(CheckerContext
&C
, const GRState
*state
,
82 const Expr
*S
, SVal l
,
83 bool IsDestination
= false);
84 const GRState
*CheckBufferAccess(CheckerContext
&C
, const GRState
*state
,
87 const Expr
*SecondBuf
= NULL
,
88 bool FirstIsDestination
= false);
89 const GRState
*CheckOverlap(CheckerContext
&C
, const GRState
*state
,
90 const Expr
*Size
, const Expr
*First
,
92 void emitOverlapBug(CheckerContext
&C
, const GRState
*state
,
93 const Stmt
*First
, const Stmt
*Second
);
98 typedef llvm::ImmutableMap
<const MemRegion
*, SVal
> EntryMap
;
100 } //end anonymous namespace
105 struct GRStateTrait
<CStringLength
>
106 : public GRStatePartialTrait
<CStringLength::EntryMap
> {
107 static void *GDMIndex() { return CStringChecker::getTag(); }
112 void ento::RegisterCStringChecker(ExprEngine
&Eng
) {
113 Eng
.registerCheck(new CStringChecker());
116 //===----------------------------------------------------------------------===//
117 // Individual checks and utility methods.
118 //===----------------------------------------------------------------------===//
120 std::pair
<const GRState
*, const GRState
*>
121 CStringChecker::assumeZero(CheckerContext
&C
, const GRState
*state
, SVal V
,
123 DefinedSVal
*val
= dyn_cast
<DefinedSVal
>(&V
);
125 return std::pair
<const GRState
*, const GRState
*>(state
, state
);
127 SValBuilder
&svalBuilder
= C
.getSValBuilder();
128 DefinedOrUnknownSVal zero
= svalBuilder
.makeZeroVal(Ty
);
129 return state
->assume(svalBuilder
.evalEQ(state
, *val
, zero
));
132 const GRState
*CStringChecker::checkNonNull(CheckerContext
&C
,
133 const GRState
*state
,
134 const Expr
*S
, SVal l
) {
135 // If a previous check has failed, propagate the failure.
139 const GRState
*stateNull
, *stateNonNull
;
140 llvm::tie(stateNull
, stateNonNull
) = assumeZero(C
, state
, l
, S
->getType());
142 if (stateNull
&& !stateNonNull
) {
143 ExplodedNode
*N
= C
.generateSink(stateNull
);
148 BT_Null
= new BuiltinBug("API",
149 "Null pointer argument in call to byte string function");
151 // Generate a report for this bug.
152 BuiltinBug
*BT
= static_cast<BuiltinBug
*>(BT_Null
);
153 EnhancedBugReport
*report
= new EnhancedBugReport(*BT
,
154 BT
->getDescription(), N
);
156 report
->addRange(S
->getSourceRange());
157 report
->addVisitorCreator(bugreporter::registerTrackNullOrUndefValue
, S
);
158 C
.EmitReport(report
);
162 // From here on, assume that the value is non-null.
163 assert(stateNonNull
);
167 // FIXME: This was originally copied from ArrayBoundChecker.cpp. Refactor?
168 const GRState
*CStringChecker::CheckLocation(CheckerContext
&C
,
169 const GRState
*state
,
170 const Expr
*S
, SVal l
,
171 bool IsDestination
) {
172 // If a previous check has failed, propagate the failure.
176 // Check for out of bound array element access.
177 const MemRegion
*R
= l
.getAsRegion();
181 const ElementRegion
*ER
= dyn_cast
<ElementRegion
>(R
);
185 assert(ER
->getValueType() == C
.getASTContext().CharTy
&&
186 "CheckLocation should only be called with char* ElementRegions");
188 // Get the size of the array.
189 const SubRegion
*superReg
= cast
<SubRegion
>(ER
->getSuperRegion());
190 SValBuilder
&svalBuilder
= C
.getSValBuilder();
191 SVal Extent
= svalBuilder
.convertToArrayIndex(superReg
->getExtent(svalBuilder
));
192 DefinedOrUnknownSVal Size
= cast
<DefinedOrUnknownSVal
>(Extent
);
194 // Get the index of the accessed element.
195 DefinedOrUnknownSVal Idx
= cast
<DefinedOrUnknownSVal
>(ER
->getIndex());
197 const GRState
*StInBound
= state
->assumeInBound(Idx
, Size
, true);
198 const GRState
*StOutBound
= state
->assumeInBound(Idx
, Size
, false);
199 if (StOutBound
&& !StInBound
) {
200 ExplodedNode
*N
= C
.generateSink(StOutBound
);
206 if (!BT_BoundsWrite
) {
207 BT_BoundsWrite
= new BuiltinBug("Out-of-bound array access",
208 "Byte string function overflows destination buffer");
210 BT
= static_cast<BuiltinBug
*>(BT_BoundsWrite
);
213 BT_Bounds
= new BuiltinBug("Out-of-bound array access",
214 "Byte string function accesses out-of-bound array element");
216 BT
= static_cast<BuiltinBug
*>(BT_Bounds
);
219 // FIXME: It would be nice to eventually make this diagnostic more clear,
220 // e.g., by referencing the original declaration or by saying *why* this
221 // reference is outside the range.
223 // Generate a report for this bug.
224 RangedBugReport
*report
= new RangedBugReport(*BT
, BT
->getDescription(), N
);
226 report
->addRange(S
->getSourceRange());
227 C
.EmitReport(report
);
231 // Array bound check succeeded. From this point forward the array bound
232 // should always succeed.
236 const GRState
*CStringChecker::CheckBufferAccess(CheckerContext
&C
,
237 const GRState
*state
,
239 const Expr
*FirstBuf
,
240 const Expr
*SecondBuf
,
241 bool FirstIsDestination
) {
242 // If a previous check has failed, propagate the failure.
246 SValBuilder
&svalBuilder
= C
.getSValBuilder();
247 ASTContext
&Ctx
= C
.getASTContext();
249 QualType sizeTy
= Size
->getType();
250 QualType PtrTy
= Ctx
.getPointerType(Ctx
.CharTy
);
252 // Check that the first buffer is non-null.
253 SVal BufVal
= state
->getSVal(FirstBuf
);
254 state
= checkNonNull(C
, state
, FirstBuf
, BufVal
);
258 // Get the access length and make sure it is known.
259 SVal LengthVal
= state
->getSVal(Size
);
260 NonLoc
*Length
= dyn_cast
<NonLoc
>(&LengthVal
);
264 // Compute the offset of the last element to be accessed: size-1.
265 NonLoc One
= cast
<NonLoc
>(svalBuilder
.makeIntVal(1, sizeTy
));
266 NonLoc LastOffset
= cast
<NonLoc
>(svalBuilder
.evalBinOpNN(state
, BO_Sub
,
267 *Length
, One
, sizeTy
));
269 // Check that the first buffer is sufficently long.
270 SVal BufStart
= svalBuilder
.evalCast(BufVal
, PtrTy
, FirstBuf
->getType());
271 if (Loc
*BufLoc
= dyn_cast
<Loc
>(&BufStart
)) {
272 SVal BufEnd
= svalBuilder
.evalBinOpLN(state
, BO_Add
, *BufLoc
,
274 state
= CheckLocation(C
, state
, FirstBuf
, BufEnd
, FirstIsDestination
);
276 // If the buffer isn't large enough, abort.
281 // If there's a second buffer, check it as well.
283 BufVal
= state
->getSVal(SecondBuf
);
284 state
= checkNonNull(C
, state
, SecondBuf
, BufVal
);
288 BufStart
= svalBuilder
.evalCast(BufVal
, PtrTy
, SecondBuf
->getType());
289 if (Loc
*BufLoc
= dyn_cast
<Loc
>(&BufStart
)) {
290 SVal BufEnd
= svalBuilder
.evalBinOpLN(state
, BO_Add
, *BufLoc
,
292 state
= CheckLocation(C
, state
, SecondBuf
, BufEnd
);
296 // Large enough or not, return this state!
300 const GRState
*CStringChecker::CheckOverlap(CheckerContext
&C
,
301 const GRState
*state
,
304 const Expr
*Second
) {
305 // Do a simple check for overlap: if the two arguments are from the same
306 // buffer, see if the end of the first is greater than the start of the second
309 // If a previous check has failed, propagate the failure.
313 const GRState
*stateTrue
, *stateFalse
;
315 // Get the buffer values and make sure they're known locations.
316 SVal firstVal
= state
->getSVal(First
);
317 SVal secondVal
= state
->getSVal(Second
);
319 Loc
*firstLoc
= dyn_cast
<Loc
>(&firstVal
);
323 Loc
*secondLoc
= dyn_cast
<Loc
>(&secondVal
);
327 // Are the two values the same?
328 SValBuilder
&svalBuilder
= C
.getSValBuilder();
329 llvm::tie(stateTrue
, stateFalse
) =
330 state
->assume(svalBuilder
.evalEQ(state
, *firstLoc
, *secondLoc
));
332 if (stateTrue
&& !stateFalse
) {
333 // If the values are known to be equal, that's automatically an overlap.
334 emitOverlapBug(C
, stateTrue
, First
, Second
);
338 // assume the two expressions are not equal.
342 // Which value comes first?
343 ASTContext
&Ctx
= svalBuilder
.getContext();
344 QualType cmpTy
= Ctx
.IntTy
;
345 SVal reverse
= svalBuilder
.evalBinOpLL(state
, BO_GT
,
346 *firstLoc
, *secondLoc
, cmpTy
);
347 DefinedOrUnknownSVal
*reverseTest
= dyn_cast
<DefinedOrUnknownSVal
>(&reverse
);
351 llvm::tie(stateTrue
, stateFalse
) = state
->assume(*reverseTest
);
354 // If we don't know which one comes first, we can't perform this test.
357 // Switch the values so that firstVal is before secondVal.
358 Loc
*tmpLoc
= firstLoc
;
359 firstLoc
= secondLoc
;
362 // Switch the Exprs as well, so that they still correspond.
363 const Expr
*tmpExpr
= First
;
369 // Get the length, and make sure it too is known.
370 SVal LengthVal
= state
->getSVal(Size
);
371 NonLoc
*Length
= dyn_cast
<NonLoc
>(&LengthVal
);
375 // Convert the first buffer's start address to char*.
376 // Bail out if the cast fails.
377 QualType CharPtrTy
= Ctx
.getPointerType(Ctx
.CharTy
);
378 SVal FirstStart
= svalBuilder
.evalCast(*firstLoc
, CharPtrTy
, First
->getType());
379 Loc
*FirstStartLoc
= dyn_cast
<Loc
>(&FirstStart
);
383 // Compute the end of the first buffer. Bail out if THAT fails.
384 SVal FirstEnd
= svalBuilder
.evalBinOpLN(state
, BO_Add
,
385 *FirstStartLoc
, *Length
, CharPtrTy
);
386 Loc
*FirstEndLoc
= dyn_cast
<Loc
>(&FirstEnd
);
390 // Is the end of the first buffer past the start of the second buffer?
391 SVal Overlap
= svalBuilder
.evalBinOpLL(state
, BO_GT
,
392 *FirstEndLoc
, *secondLoc
, cmpTy
);
393 DefinedOrUnknownSVal
*OverlapTest
= dyn_cast
<DefinedOrUnknownSVal
>(&Overlap
);
397 llvm::tie(stateTrue
, stateFalse
) = state
->assume(*OverlapTest
);
399 if (stateTrue
&& !stateFalse
) {
401 emitOverlapBug(C
, stateTrue
, First
, Second
);
405 // assume the two expressions don't overlap.
410 void CStringChecker::emitOverlapBug(CheckerContext
&C
, const GRState
*state
,
411 const Stmt
*First
, const Stmt
*Second
) {
412 ExplodedNode
*N
= C
.generateSink(state
);
417 BT_Overlap
= new BugType("Unix API", "Improper arguments");
419 // Generate a report for this bug.
420 RangedBugReport
*report
=
421 new RangedBugReport(*BT_Overlap
,
422 "Arguments must not be overlapping buffers", N
);
423 report
->addRange(First
->getSourceRange());
424 report
->addRange(Second
->getSourceRange());
426 C
.EmitReport(report
);
429 const GRState
*CStringChecker::setCStringLength(const GRState
*state
,
432 assert(!strLength
.isUndef() && "Attempt to set an undefined string length");
433 if (strLength
.isUnknown())
436 MR
= MR
->StripCasts();
438 switch (MR
->getKind()) {
439 case MemRegion::StringRegionKind
:
440 // FIXME: This can happen if we strcpy() into a string region. This is
441 // undefined [C99 6.4.5p6], but we should still warn about it.
444 case MemRegion::SymbolicRegionKind
:
445 case MemRegion::AllocaRegionKind
:
446 case MemRegion::VarRegionKind
:
447 case MemRegion::FieldRegionKind
:
448 case MemRegion::ObjCIvarRegionKind
:
449 return state
->set
<CStringLength
>(MR
, strLength
);
451 case MemRegion::ElementRegionKind
:
452 // FIXME: Handle element regions by upper-bounding the parent region's
457 // Other regions (mostly non-data) can't have a reliable C string length.
458 // For now, just ignore the change.
459 // FIXME: These are rare but not impossible. We should output some kind of
460 // warning for things like strcpy((char[]){'a', 0}, "b");
465 SVal
CStringChecker::getCStringLengthForRegion(CheckerContext
&C
,
466 const GRState
*&state
,
468 const MemRegion
*MR
) {
469 // If there's a recorded length, go ahead and return it.
470 const SVal
*Recorded
= state
->get
<CStringLength
>(MR
);
474 // Otherwise, get a new symbol and update the state.
475 unsigned Count
= C
.getNodeBuilder().getCurrentBlockCount();
476 SValBuilder
&svalBuilder
= C
.getSValBuilder();
477 QualType sizeTy
= svalBuilder
.getContext().getSizeType();
478 SVal strLength
= svalBuilder
.getMetadataSymbolVal(getTag(), MR
, Ex
, sizeTy
, Count
);
479 state
= state
->set
<CStringLength
>(MR
, strLength
);
483 SVal
CStringChecker::getCStringLength(CheckerContext
&C
, const GRState
*&state
,
484 const Expr
*Ex
, SVal Buf
) {
485 const MemRegion
*MR
= Buf
.getAsRegion();
487 // If we can't get a region, see if it's something we /know/ isn't a
488 // C string. In the context of locations, the only time we can issue such
489 // a warning is for labels.
490 if (loc::GotoLabel
*Label
= dyn_cast
<loc::GotoLabel
>(&Buf
)) {
491 if (ExplodedNode
*N
= C
.generateNode(state
)) {
493 BT_NotCString
= new BuiltinBug("API",
494 "Argument is not a null-terminated string.");
496 llvm::SmallString
<120> buf
;
497 llvm::raw_svector_ostream
os(buf
);
498 os
<< "Argument to byte string function is the address of the label '"
499 << Label
->getLabel()->getID()->getName()
500 << "', which is not a null-terminated string";
502 // Generate a report for this bug.
503 EnhancedBugReport
*report
= new EnhancedBugReport(*BT_NotCString
,
506 report
->addRange(Ex
->getSourceRange());
507 C
.EmitReport(report
);
510 return UndefinedVal();
513 // If it's not a region and not a label, give up.
517 // If we have a region, strip casts from it and see if we can figure out
518 // its length. For anything we can't figure out, just return UnknownVal.
519 MR
= MR
->StripCasts();
521 switch (MR
->getKind()) {
522 case MemRegion::StringRegionKind
: {
523 // Modifying the contents of string regions is undefined [C99 6.4.5p6],
524 // so we can assume that the byte length is the correct C string length.
525 SValBuilder
&svalBuilder
= C
.getSValBuilder();
526 QualType sizeTy
= svalBuilder
.getContext().getSizeType();
527 const StringLiteral
*strLit
= cast
<StringRegion
>(MR
)->getStringLiteral();
528 return svalBuilder
.makeIntVal(strLit
->getByteLength(), sizeTy
);
530 case MemRegion::SymbolicRegionKind
:
531 case MemRegion::AllocaRegionKind
:
532 case MemRegion::VarRegionKind
:
533 case MemRegion::FieldRegionKind
:
534 case MemRegion::ObjCIvarRegionKind
:
535 return getCStringLengthForRegion(C
, state
, Ex
, MR
);
536 case MemRegion::CompoundLiteralRegionKind
:
537 // FIXME: Can we track this? Is it necessary?
539 case MemRegion::ElementRegionKind
:
540 // FIXME: How can we handle this? It's not good enough to subtract the
541 // offset from the base string length; consider "123\x00567" and &a[5].
544 // Other regions (mostly non-data) can't have a reliable C string length.
545 // In this case, an error is emitted and UndefinedVal is returned.
546 // The caller should always be prepared to handle this case.
547 if (ExplodedNode
*N
= C
.generateNode(state
)) {
549 BT_NotCString
= new BuiltinBug("API",
550 "Argument is not a null-terminated string.");
552 llvm::SmallString
<120> buf
;
553 llvm::raw_svector_ostream
os(buf
);
555 os
<< "Argument to byte string function is ";
557 if (SummarizeRegion(os
, C
.getASTContext(), MR
))
558 os
<< ", which is not a null-terminated string";
560 os
<< "not a null-terminated string";
562 // Generate a report for this bug.
563 EnhancedBugReport
*report
= new EnhancedBugReport(*BT_NotCString
,
566 report
->addRange(Ex
->getSourceRange());
567 C
.EmitReport(report
);
570 return UndefinedVal();
574 const GRState
*CStringChecker::InvalidateBuffer(CheckerContext
&C
,
575 const GRState
*state
,
576 const Expr
*E
, SVal V
) {
577 Loc
*L
= dyn_cast
<Loc
>(&V
);
581 // FIXME: This is a simplified version of what's in CFRefCount.cpp -- it makes
582 // some assumptions about the value that CFRefCount can't. Even so, it should
583 // probably be refactored.
584 if (loc::MemRegionVal
* MR
= dyn_cast
<loc::MemRegionVal
>(L
)) {
585 const MemRegion
*R
= MR
->getRegion()->StripCasts();
587 // Are we dealing with an ElementRegion? If so, we should be invalidating
589 if (const ElementRegion
*ER
= dyn_cast
<ElementRegion
>(R
)) {
590 R
= ER
->getSuperRegion();
591 // FIXME: What about layers of ElementRegions?
594 // Invalidate this region.
595 unsigned Count
= C
.getNodeBuilder().getCurrentBlockCount();
596 return state
->InvalidateRegion(R
, E
, Count
, NULL
);
599 // If we have a non-region value by chance, just remove the binding.
600 // FIXME: is this necessary or correct? This handles the non-Region
601 // cases. Is it ever valid to store to these?
602 return state
->unbindLoc(*L
);
605 bool CStringChecker::SummarizeRegion(llvm::raw_ostream
& os
, ASTContext
& Ctx
,
606 const MemRegion
*MR
) {
607 const TypedRegion
*TR
= dyn_cast
<TypedRegion
>(MR
);
611 switch (TR
->getKind()) {
612 case MemRegion::FunctionTextRegionKind
: {
613 const FunctionDecl
*FD
= cast
<FunctionTextRegion
>(TR
)->getDecl();
615 os
<< "the address of the function '" << FD
<< "'";
617 os
<< "the address of a function";
620 case MemRegion::BlockTextRegionKind
:
623 case MemRegion::BlockDataRegionKind
:
626 case MemRegion::CXXThisRegionKind
:
627 case MemRegion::CXXTempObjectRegionKind
:
628 os
<< "a C++ temp object of type " << TR
->getValueType().getAsString();
630 case MemRegion::VarRegionKind
:
631 os
<< "a variable of type" << TR
->getValueType().getAsString();
633 case MemRegion::FieldRegionKind
:
634 os
<< "a field of type " << TR
->getValueType().getAsString();
636 case MemRegion::ObjCIvarRegionKind
:
637 os
<< "an instance variable of type " << TR
->getValueType().getAsString();
644 //===----------------------------------------------------------------------===//
645 // evaluation of individual function calls.
646 //===----------------------------------------------------------------------===//
648 void CStringChecker::evalCopyCommon(CheckerContext
&C
, const GRState
*state
,
649 const Expr
*Size
, const Expr
*Dest
,
650 const Expr
*Source
, bool Restricted
) {
651 // See if the size argument is zero.
652 SVal sizeVal
= state
->getSVal(Size
);
653 QualType sizeTy
= Size
->getType();
655 const GRState
*stateZeroSize
, *stateNonZeroSize
;
656 llvm::tie(stateZeroSize
, stateNonZeroSize
) = assumeZero(C
, state
, sizeVal
, sizeTy
);
658 // If the size is zero, there won't be any actual memory access.
660 C
.addTransition(stateZeroSize
);
662 // If the size can be nonzero, we have to check the other arguments.
663 if (stateNonZeroSize
) {
664 state
= stateNonZeroSize
;
665 state
= CheckBufferAccess(C
, state
, Size
, Dest
, Source
,
666 /* FirstIsDst = */ true);
668 state
= CheckOverlap(C
, state
, Size
, Dest
, Source
);
671 // Invalidate the destination.
672 // FIXME: Even if we can't perfectly model the copy, we should see if we
673 // can use LazyCompoundVals to copy the source values into the destination.
674 // This would probably remove any existing bindings past the end of the
675 // copied region, but that's still an improvement over blank invalidation.
676 state
= InvalidateBuffer(C
, state
, Dest
, state
->getSVal(Dest
));
677 C
.addTransition(state
);
683 void CStringChecker::evalMemcpy(CheckerContext
&C
, const CallExpr
*CE
) {
684 // void *memcpy(void *restrict dst, const void *restrict src, size_t n);
685 // The return value is the address of the destination buffer.
686 const Expr
*Dest
= CE
->getArg(0);
687 const GRState
*state
= C
.getState();
688 state
= state
->BindExpr(CE
, state
->getSVal(Dest
));
689 evalCopyCommon(C
, state
, CE
->getArg(2), Dest
, CE
->getArg(1), true);
692 void CStringChecker::evalMemmove(CheckerContext
&C
, const CallExpr
*CE
) {
693 // void *memmove(void *dst, const void *src, size_t n);
694 // The return value is the address of the destination buffer.
695 const Expr
*Dest
= CE
->getArg(0);
696 const GRState
*state
= C
.getState();
697 state
= state
->BindExpr(CE
, state
->getSVal(Dest
));
698 evalCopyCommon(C
, state
, CE
->getArg(2), Dest
, CE
->getArg(1));
701 void CStringChecker::evalBcopy(CheckerContext
&C
, const CallExpr
*CE
) {
702 // void bcopy(const void *src, void *dst, size_t n);
703 evalCopyCommon(C
, C
.getState(), CE
->getArg(2), CE
->getArg(1), CE
->getArg(0));
706 void CStringChecker::evalMemcmp(CheckerContext
&C
, const CallExpr
*CE
) {
707 // int memcmp(const void *s1, const void *s2, size_t n);
708 const Expr
*Left
= CE
->getArg(0);
709 const Expr
*Right
= CE
->getArg(1);
710 const Expr
*Size
= CE
->getArg(2);
712 const GRState
*state
= C
.getState();
713 SValBuilder
&svalBuilder
= C
.getSValBuilder();
715 // See if the size argument is zero.
716 SVal sizeVal
= state
->getSVal(Size
);
717 QualType sizeTy
= Size
->getType();
719 const GRState
*stateZeroSize
, *stateNonZeroSize
;
720 llvm::tie(stateZeroSize
, stateNonZeroSize
) =
721 assumeZero(C
, state
, sizeVal
, sizeTy
);
723 // If the size can be zero, the result will be 0 in that case, and we don't
724 // have to check either of the buffers.
726 state
= stateZeroSize
;
727 state
= state
->BindExpr(CE
, svalBuilder
.makeZeroVal(CE
->getType()));
728 C
.addTransition(state
);
731 // If the size can be nonzero, we have to check the other arguments.
732 if (stateNonZeroSize
) {
733 state
= stateNonZeroSize
;
734 // If we know the two buffers are the same, we know the result is 0.
735 // First, get the two buffers' addresses. Another checker will have already
736 // made sure they're not undefined.
737 DefinedOrUnknownSVal LV
= cast
<DefinedOrUnknownSVal
>(state
->getSVal(Left
));
738 DefinedOrUnknownSVal RV
= cast
<DefinedOrUnknownSVal
>(state
->getSVal(Right
));
740 // See if they are the same.
741 DefinedOrUnknownSVal SameBuf
= svalBuilder
.evalEQ(state
, LV
, RV
);
742 const GRState
*StSameBuf
, *StNotSameBuf
;
743 llvm::tie(StSameBuf
, StNotSameBuf
) = state
->assume(SameBuf
);
745 // If the two arguments might be the same buffer, we know the result is zero,
746 // and we only need to check one size.
749 state
= CheckBufferAccess(C
, state
, Size
, Left
);
751 state
= StSameBuf
->BindExpr(CE
, svalBuilder
.makeZeroVal(CE
->getType()));
752 C
.addTransition(state
);
756 // If the two arguments might be different buffers, we have to check the
757 // size of both of them.
759 state
= StNotSameBuf
;
760 state
= CheckBufferAccess(C
, state
, Size
, Left
, Right
);
762 // The return value is the comparison result, which we don't know.
763 unsigned Count
= C
.getNodeBuilder().getCurrentBlockCount();
764 SVal CmpV
= svalBuilder
.getConjuredSymbolVal(NULL
, CE
, Count
);
765 state
= state
->BindExpr(CE
, CmpV
);
766 C
.addTransition(state
);
772 void CStringChecker::evalstrLength(CheckerContext
&C
, const CallExpr
*CE
) {
773 // size_t strlen(const char *s);
774 const GRState
*state
= C
.getState();
775 const Expr
*Arg
= CE
->getArg(0);
776 SVal ArgVal
= state
->getSVal(Arg
);
778 // Check that the argument is non-null.
779 state
= checkNonNull(C
, state
, Arg
, ArgVal
);
782 SVal strLength
= getCStringLength(C
, state
, Arg
, ArgVal
);
784 // If the argument isn't a valid C string, there's no valid state to
786 if (strLength
.isUndef())
789 // If getCStringLength couldn't figure out the length, conjure a return
790 // value, so it can be used in constraints, at least.
791 if (strLength
.isUnknown()) {
792 unsigned Count
= C
.getNodeBuilder().getCurrentBlockCount();
793 strLength
= C
.getSValBuilder().getConjuredSymbolVal(NULL
, CE
, Count
);
796 // Bind the return value.
797 state
= state
->BindExpr(CE
, strLength
);
798 C
.addTransition(state
);
802 void CStringChecker::evalStrcpy(CheckerContext
&C
, const CallExpr
*CE
) {
803 // char *strcpy(char *restrict dst, const char *restrict src);
804 evalStrcpyCommon(C
, CE
, /* returnEnd = */ false);
807 void CStringChecker::evalStpcpy(CheckerContext
&C
, const CallExpr
*CE
) {
808 // char *stpcpy(char *restrict dst, const char *restrict src);
809 evalStrcpyCommon(C
, CE
, /* returnEnd = */ true);
812 void CStringChecker::evalStrcpyCommon(CheckerContext
&C
, const CallExpr
*CE
,
814 const GRState
*state
= C
.getState();
816 // Check that the destination is non-null
817 const Expr
*Dst
= CE
->getArg(0);
818 SVal DstVal
= state
->getSVal(Dst
);
820 state
= checkNonNull(C
, state
, Dst
, DstVal
);
824 // Check that the source is non-null.
825 const Expr
*srcExpr
= CE
->getArg(1);
826 SVal srcVal
= state
->getSVal(srcExpr
);
827 state
= checkNonNull(C
, state
, srcExpr
, srcVal
);
831 // Get the string length of the source.
832 SVal strLength
= getCStringLength(C
, state
, srcExpr
, srcVal
);
834 // If the source isn't a valid C string, give up.
835 if (strLength
.isUndef())
838 SVal Result
= (returnEnd
? UnknownVal() : DstVal
);
840 // If the destination is a MemRegion, try to check for a buffer overflow and
841 // record the new string length.
842 if (loc::MemRegionVal
*dstRegVal
= dyn_cast
<loc::MemRegionVal
>(&DstVal
)) {
843 // If the length is known, we can check for an overflow.
844 if (NonLoc
*knownStrLength
= dyn_cast
<NonLoc
>(&strLength
)) {
846 C
.getSValBuilder().evalBinOpLN(state
, BO_Add
, *dstRegVal
,
847 *knownStrLength
, Dst
->getType());
849 state
= CheckLocation(C
, state
, Dst
, lastElement
, /* IsDst = */ true);
853 // If this is a stpcpy-style copy, the last element is the return value.
855 Result
= lastElement
;
858 // Invalidate the destination. This must happen before we set the C string
859 // length because invalidation will clear the length.
860 // FIXME: Even if we can't perfectly model the copy, we should see if we
861 // can use LazyCompoundVals to copy the source values into the destination.
862 // This would probably remove any existing bindings past the end of the
863 // string, but that's still an improvement over blank invalidation.
864 state
= InvalidateBuffer(C
, state
, Dst
, *dstRegVal
);
866 // Set the C string length of the destination.
867 state
= setCStringLength(state
, dstRegVal
->getRegion(), strLength
);
870 // If this is a stpcpy-style copy, but we were unable to check for a buffer
871 // overflow, we still need a result. Conjure a return value.
872 if (returnEnd
&& Result
.isUnknown()) {
873 SValBuilder
&svalBuilder
= C
.getSValBuilder();
874 unsigned Count
= C
.getNodeBuilder().getCurrentBlockCount();
875 strLength
= svalBuilder
.getConjuredSymbolVal(NULL
, CE
, Count
);
878 // Set the return value.
879 state
= state
->BindExpr(CE
, Result
);
880 C
.addTransition(state
);
883 //===----------------------------------------------------------------------===//
884 // The driver method, and other Checker callbacks.
885 //===----------------------------------------------------------------------===//
887 bool CStringChecker::evalCallExpr(CheckerContext
&C
, const CallExpr
*CE
) {
888 // Get the callee. All the functions we care about are C functions
889 // with simple identifiers.
890 const GRState
*state
= C
.getState();
891 const Expr
*Callee
= CE
->getCallee();
892 const FunctionDecl
*FD
= state
->getSVal(Callee
).getAsFunctionDecl();
897 // Get the name of the callee. If it's a builtin, strip off the prefix.
898 IdentifierInfo
*II
= FD
->getIdentifier();
899 if (!II
) // if no identifier, not a simple C function
901 llvm::StringRef Name
= II
->getName();
902 if (Name
.startswith("__builtin_"))
903 Name
= Name
.substr(10);
905 FnCheck evalFunction
= llvm::StringSwitch
<FnCheck
>(Name
)
906 .Cases("memcpy", "__memcpy_chk", &CStringChecker::evalMemcpy
)
907 .Cases("memcmp", "bcmp", &CStringChecker::evalMemcmp
)
908 .Cases("memmove", "__memmove_chk", &CStringChecker::evalMemmove
)
909 .Cases("strcpy", "__strcpy_chk", &CStringChecker::evalStrcpy
)
910 .Cases("stpcpy", "__stpcpy_chk", &CStringChecker::evalStpcpy
)
911 .Case("strlen", &CStringChecker::evalstrLength
)
912 .Case("bcopy", &CStringChecker::evalBcopy
)
915 // If the callee isn't a string function, let another checker handle it.
919 // Check and evaluate the call.
920 (this->*evalFunction
)(C
, CE
);
924 void CStringChecker::PreVisitDeclStmt(CheckerContext
&C
, const DeclStmt
*DS
) {
925 // Record string length for char a[] = "abc";
926 const GRState
*state
= C
.getState();
928 for (DeclStmt::const_decl_iterator I
= DS
->decl_begin(), E
= DS
->decl_end();
930 const VarDecl
*D
= dyn_cast
<VarDecl
>(*I
);
934 // FIXME: Handle array fields of structs.
935 if (!D
->getType()->isArrayType())
938 const Expr
*Init
= D
->getInit();
941 if (!isa
<StringLiteral
>(Init
))
944 Loc VarLoc
= state
->getLValue(D
, C
.getPredecessor()->getLocationContext());
945 const MemRegion
*MR
= VarLoc
.getAsRegion();
949 SVal StrVal
= state
->getSVal(Init
);
950 assert(StrVal
.isValid() && "Initializer string is unknown or undefined");
951 DefinedOrUnknownSVal strLength
952 = cast
<DefinedOrUnknownSVal
>(getCStringLength(C
, state
, Init
, StrVal
));
954 state
= state
->set
<CStringLength
>(MR
, strLength
);
957 C
.addTransition(state
);
960 bool CStringChecker::WantsRegionChangeUpdate(const GRState
*state
) {
961 CStringLength::EntryMap Entries
= state
->get
<CStringLength
>();
962 return !Entries
.isEmpty();
965 const GRState
*CStringChecker::EvalRegionChanges(const GRState
*state
,
966 const MemRegion
* const *Begin
,
967 const MemRegion
* const *End
,
969 CStringLength::EntryMap Entries
= state
->get
<CStringLength
>();
970 if (Entries
.isEmpty())
973 llvm::SmallPtrSet
<const MemRegion
*, 8> Invalidated
;
974 llvm::SmallPtrSet
<const MemRegion
*, 32> SuperRegions
;
976 // First build sets for the changed regions and their super-regions.
977 for ( ; Begin
!= End
; ++Begin
) {
978 const MemRegion
*MR
= *Begin
;
979 Invalidated
.insert(MR
);
981 SuperRegions
.insert(MR
);
982 while (const SubRegion
*SR
= dyn_cast
<SubRegion
>(MR
)) {
983 MR
= SR
->getSuperRegion();
984 SuperRegions
.insert(MR
);
988 CStringLength::EntryMap::Factory
&F
= state
->get_context
<CStringLength
>();
990 // Then loop over the entries in the current state.
991 for (CStringLength::EntryMap::iterator I
= Entries
.begin(),
992 E
= Entries
.end(); I
!= E
; ++I
) {
993 const MemRegion
*MR
= I
.getKey();
995 // Is this entry for a super-region of a changed region?
996 if (SuperRegions
.count(MR
)) {
997 Entries
= F
.remove(Entries
, MR
);
1001 // Is this entry for a sub-region of a changed region?
1002 const MemRegion
*Super
= MR
;
1003 while (const SubRegion
*SR
= dyn_cast
<SubRegion
>(Super
)) {
1004 Super
= SR
->getSuperRegion();
1005 if (Invalidated
.count(Super
)) {
1006 Entries
= F
.remove(Entries
, MR
);
1012 return state
->set
<CStringLength
>(Entries
);
1015 void CStringChecker::MarkLiveSymbols(const GRState
*state
, SymbolReaper
&SR
) {
1016 // Mark all symbols in our string length map as valid.
1017 CStringLength::EntryMap Entries
= state
->get
<CStringLength
>();
1019 for (CStringLength::EntryMap::iterator I
= Entries
.begin(), E
= Entries
.end();
1021 SVal Len
= I
.getData();
1022 if (SymbolRef Sym
= Len
.getAsSymbol())
1027 void CStringChecker::evalDeadSymbols(CheckerContext
&C
, SymbolReaper
&SR
) {
1028 if (!SR
.hasDeadSymbols())
1031 const GRState
*state
= C
.getState();
1032 CStringLength::EntryMap Entries
= state
->get
<CStringLength
>();
1033 if (Entries
.isEmpty())
1036 CStringLength::EntryMap::Factory
&F
= state
->get_context
<CStringLength
>();
1037 for (CStringLength::EntryMap::iterator I
= Entries
.begin(), E
= Entries
.end();
1039 SVal Len
= I
.getData();
1040 if (SymbolRef Sym
= Len
.getAsSymbol()) {
1042 Entries
= F
.remove(Entries
, I
.getKey());
1046 state
= state
->set
<CStringLength
>(Entries
);
1047 C
.generateNode(state
);