1 //= CStringChecker.cpp - Checks calls to C string functions --------*- C++ -*-//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This defines CStringChecker, which is an assortment of checks on calls
11 // to functions in <string.h>.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "ClangSACheckers.h"
16 #include "InterCheckerAPI.h"
17 #include "clang/StaticAnalyzer/Core/Checker.h"
18 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
19 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
20 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
21 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
22 #include "llvm/ADT/SmallString.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/ADT/StringSwitch.h"
25 #include "llvm/Support/raw_ostream.h"
26 
27 using namespace clang;
28 using namespace ento;
29 
30 namespace {
31 class CStringChecker : public Checker< eval::Call,
32                                          check::PreStmt<DeclStmt>,
33                                          check::LiveSymbols,
34                                          check::DeadSymbols,
35                                          check::RegionChanges
36                                          > {
37   mutable OwningPtr<BugType> BT_Null,
38                              BT_Bounds,
39                              BT_Overlap,
40                              BT_NotCString,
41                              BT_AdditionOverflow;
42 
43   mutable const char *CurrentFunctionDescription;
44 
45 public:
46   /// The filter is used to filter out the diagnostics which are not enabled by
47   /// the user.
48   struct CStringChecksFilter {
49     DefaultBool CheckCStringNullArg;
50     DefaultBool CheckCStringOutOfBounds;
51     DefaultBool CheckCStringBufferOverlap;
52     DefaultBool CheckCStringNotNullTerm;
53   };
54 
55   CStringChecksFilter Filter;
56 
57   static void *getTag() { static int tag; return &tag; }
58 
59   bool evalCall(const CallExpr *CE, CheckerContext &C) const;
60   void checkPreStmt(const DeclStmt *DS, CheckerContext &C) const;
61   void checkLiveSymbols(ProgramStateRef state, SymbolReaper &SR) const;
62   void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const;
63   bool wantsRegionChangeUpdate(ProgramStateRef state) const;
64 
65   ProgramStateRef
66     checkRegionChanges(ProgramStateRef state,
67                        const StoreManager::InvalidatedSymbols *,
68                        ArrayRef<const MemRegion *> ExplicitRegions,
69                        ArrayRef<const MemRegion *> Regions,
70                        const CallEvent *Call) const;
71 
72   typedef void (CStringChecker::*FnCheck)(CheckerContext &,
73                                           const CallExpr *) const;
74 
75   void evalMemcpy(CheckerContext &C, const CallExpr *CE) const;
76   void evalMempcpy(CheckerContext &C, const CallExpr *CE) const;
77   void evalMemmove(CheckerContext &C, const CallExpr *CE) const;
78   void evalBcopy(CheckerContext &C, const CallExpr *CE) const;
79   void evalCopyCommon(CheckerContext &C, const CallExpr *CE,
80                       ProgramStateRef state,
81                       const Expr *Size,
82                       const Expr *Source,
83                       const Expr *Dest,
84                       bool Restricted = false,
85                       bool IsMempcpy = false) const;
86 
87   void evalMemcmp(CheckerContext &C, const CallExpr *CE) const;
88 
89   void evalstrLength(CheckerContext &C, const CallExpr *CE) const;
90   void evalstrnLength(CheckerContext &C, const CallExpr *CE) const;
91   void evalstrLengthCommon(CheckerContext &C,
92                            const CallExpr *CE,
93                            bool IsStrnlen = false) const;
94 
95   void evalStrcpy(CheckerContext &C, const CallExpr *CE) const;
96   void evalStrncpy(CheckerContext &C, const CallExpr *CE) const;
97   void evalStpcpy(CheckerContext &C, const CallExpr *CE) const;
98   void evalStrcpyCommon(CheckerContext &C,
99                         const CallExpr *CE,
100                         bool returnEnd,
101                         bool isBounded,
102                         bool isAppending) const;
103 
104   void evalStrcat(CheckerContext &C, const CallExpr *CE) const;
105   void evalStrncat(CheckerContext &C, const CallExpr *CE) const;
106 
107   void evalStrcmp(CheckerContext &C, const CallExpr *CE) const;
108   void evalStrncmp(CheckerContext &C, const CallExpr *CE) const;
109   void evalStrcasecmp(CheckerContext &C, const CallExpr *CE) const;
110   void evalStrncasecmp(CheckerContext &C, const CallExpr *CE) const;
111   void evalStrcmpCommon(CheckerContext &C,
112                         const CallExpr *CE,
113                         bool isBounded = false,
114                         bool ignoreCase = false) const;
115 
116   // Utility methods
117   std::pair<ProgramStateRef , ProgramStateRef >
118   static assumeZero(CheckerContext &C,
119                     ProgramStateRef state, SVal V, QualType Ty);
120 
121   static ProgramStateRef setCStringLength(ProgramStateRef state,
122                                               const MemRegion *MR,
123                                               SVal strLength);
124   static SVal getCStringLengthForRegion(CheckerContext &C,
125                                         ProgramStateRef &state,
126                                         const Expr *Ex,
127                                         const MemRegion *MR,
128                                         bool hypothetical);
129   SVal getCStringLength(CheckerContext &C,
130                         ProgramStateRef &state,
131                         const Expr *Ex,
132                         SVal Buf,
133                         bool hypothetical = false) const;
134 
135   const StringLiteral *getCStringLiteral(CheckerContext &C,
136                                          ProgramStateRef &state,
137                                          const Expr *expr,
138                                          SVal val) const;
139 
140   static ProgramStateRef InvalidateBuffer(CheckerContext &C,
141                                               ProgramStateRef state,
142                                               const Expr *Ex, SVal V);
143 
144   static bool SummarizeRegion(raw_ostream &os, ASTContext &Ctx,
145                               const MemRegion *MR);
146 
147   // Re-usable checks
148   ProgramStateRef checkNonNull(CheckerContext &C,
149                                    ProgramStateRef state,
150                                    const Expr *S,
151                                    SVal l) const;
152   ProgramStateRef CheckLocation(CheckerContext &C,
153                                     ProgramStateRef state,
154                                     const Expr *S,
155                                     SVal l,
156                                     const char *message = NULL) const;
157   ProgramStateRef CheckBufferAccess(CheckerContext &C,
158                                         ProgramStateRef state,
159                                         const Expr *Size,
160                                         const Expr *FirstBuf,
161                                         const Expr *SecondBuf,
162                                         const char *firstMessage = NULL,
163                                         const char *secondMessage = NULL,
164                                         bool WarnAboutSize = false) const;
165 
166   ProgramStateRef CheckBufferAccess(CheckerContext &C,
167                                         ProgramStateRef state,
168                                         const Expr *Size,
169                                         const Expr *Buf,
170                                         const char *message = NULL,
171                                         bool WarnAboutSize = false) const {
172     // This is a convenience override.
173     return CheckBufferAccess(C, state, Size, Buf, NULL, message, NULL,
174                              WarnAboutSize);
175   }
176   ProgramStateRef CheckOverlap(CheckerContext &C,
177                                    ProgramStateRef state,
178                                    const Expr *Size,
179                                    const Expr *First,
180                                    const Expr *Second) const;
181   void emitOverlapBug(CheckerContext &C,
182                       ProgramStateRef state,
183                       const Stmt *First,
184                       const Stmt *Second) const;
185 
186   ProgramStateRef checkAdditionOverflow(CheckerContext &C,
187                                             ProgramStateRef state,
188                                             NonLoc left,
189                                             NonLoc right) const;
190 };
191 
192 } //end anonymous namespace
193 
194 REGISTER_MAP_WITH_PROGRAMSTATE(CStringLength, const MemRegion *, SVal)
195 
196 //===----------------------------------------------------------------------===//
197 // Individual checks and utility methods.
198 //===----------------------------------------------------------------------===//
199 
200 std::pair<ProgramStateRef , ProgramStateRef >
201 CStringChecker::assumeZero(CheckerContext &C, ProgramStateRef state, SVal V,
202                            QualType Ty) {
203   DefinedSVal *val = dyn_cast<DefinedSVal>(&V);
204   if (!val)
205     return std::pair<ProgramStateRef , ProgramStateRef >(state, state);
206 
207   SValBuilder &svalBuilder = C.getSValBuilder();
208   DefinedOrUnknownSVal zero = svalBuilder.makeZeroVal(Ty);
209   return state->assume(svalBuilder.evalEQ(state, *val, zero));
210 }
211 
212 ProgramStateRef CStringChecker::checkNonNull(CheckerContext &C,
213                                             ProgramStateRef state,
214                                             const Expr *S, SVal l) const {
215   // If a previous check has failed, propagate the failure.
216   if (!state)
217     return NULL;
218 
219   ProgramStateRef stateNull, stateNonNull;
220   llvm::tie(stateNull, stateNonNull) = assumeZero(C, state, l, S->getType());
221 
222   if (stateNull && !stateNonNull) {
223     if (!Filter.CheckCStringNullArg)
224       return NULL;
225 
226     ExplodedNode *N = C.generateSink(stateNull);
227     if (!N)
228       return NULL;
229 
230     if (!BT_Null)
231       BT_Null.reset(new BuiltinBug("Unix API",
232         "Null pointer argument in call to byte string function"));
233 
234     SmallString<80> buf;
235     llvm::raw_svector_ostream os(buf);
236     assert(CurrentFunctionDescription);
237     os << "Null pointer argument in call to " << CurrentFunctionDescription;
238 
239     // Generate a report for this bug.
240     BuiltinBug *BT = static_cast<BuiltinBug*>(BT_Null.get());
241     BugReport *report = new BugReport(*BT, os.str(), N);
242 
243     report->addRange(S->getSourceRange());
244     bugreporter::trackNullOrUndefValue(N, S, *report);
245     C.emitReport(report);
246     return NULL;
247   }
248 
249   // From here on, assume that the value is non-null.
250   assert(stateNonNull);
251   return stateNonNull;
252 }
253 
254 // FIXME: This was originally copied from ArrayBoundChecker.cpp. Refactor?
255 ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C,
256                                              ProgramStateRef state,
257                                              const Expr *S, SVal l,
258                                              const char *warningMsg) const {
259   // If a previous check has failed, propagate the failure.
260   if (!state)
261     return NULL;
262 
263   // Check for out of bound array element access.
264   const MemRegion *R = l.getAsRegion();
265   if (!R)
266     return state;
267 
268   const ElementRegion *ER = dyn_cast<ElementRegion>(R);
269   if (!ER)
270     return state;
271 
272   assert(ER->getValueType() == C.getASTContext().CharTy &&
273     "CheckLocation should only be called with char* ElementRegions");
274 
275   // Get the size of the array.
276   const SubRegion *superReg = cast<SubRegion>(ER->getSuperRegion());
277   SValBuilder &svalBuilder = C.getSValBuilder();
278   SVal Extent =
279     svalBuilder.convertToArrayIndex(superReg->getExtent(svalBuilder));
280   DefinedOrUnknownSVal Size = cast<DefinedOrUnknownSVal>(Extent);
281 
282   // Get the index of the accessed element.
283   DefinedOrUnknownSVal Idx = cast<DefinedOrUnknownSVal>(ER->getIndex());
284 
285   ProgramStateRef StInBound = state->assumeInBound(Idx, Size, true);
286   ProgramStateRef StOutBound = state->assumeInBound(Idx, Size, false);
287   if (StOutBound && !StInBound) {
288     ExplodedNode *N = C.generateSink(StOutBound);
289     if (!N)
290       return NULL;
291 
292     if (!BT_Bounds) {
293       BT_Bounds.reset(new BuiltinBug("Out-of-bound array access",
294         "Byte string function accesses out-of-bound array element"));
295     }
296     BuiltinBug *BT = static_cast<BuiltinBug*>(BT_Bounds.get());
297 
298     // Generate a report for this bug.
299     BugReport *report;
300     if (warningMsg) {
301       report = new BugReport(*BT, warningMsg, N);
302     } else {
303       assert(CurrentFunctionDescription);
304       assert(CurrentFunctionDescription[0] != '\0');
305 
306       SmallString<80> buf;
307       llvm::raw_svector_ostream os(buf);
308       os << (char)toupper(CurrentFunctionDescription[0])
309          << &CurrentFunctionDescription[1]
310          << " accesses out-of-bound array element";
311       report = new BugReport(*BT, os.str(), N);
312     }
313 
314     // FIXME: It would be nice to eventually make this diagnostic more clear,
315     // e.g., by referencing the original declaration or by saying *why* this
316     // reference is outside the range.
317 
318     report->addRange(S->getSourceRange());
319     C.emitReport(report);
320     return NULL;
321   }
322 
323   // Array bound check succeeded.  From this point forward the array bound
324   // should always succeed.
325   return StInBound;
326 }
327 
328 ProgramStateRef CStringChecker::CheckBufferAccess(CheckerContext &C,
329                                                  ProgramStateRef state,
330                                                  const Expr *Size,
331                                                  const Expr *FirstBuf,
332                                                  const Expr *SecondBuf,
333                                                  const char *firstMessage,
334                                                  const char *secondMessage,
335                                                  bool WarnAboutSize) const {
336   // If a previous check has failed, propagate the failure.
337   if (!state)
338     return NULL;
339 
340   SValBuilder &svalBuilder = C.getSValBuilder();
341   ASTContext &Ctx = svalBuilder.getContext();
342   const LocationContext *LCtx = C.getLocationContext();
343 
344   QualType sizeTy = Size->getType();
345   QualType PtrTy = Ctx.getPointerType(Ctx.CharTy);
346 
347   // Check that the first buffer is non-null.
348   SVal BufVal = state->getSVal(FirstBuf, LCtx);
349   state = checkNonNull(C, state, FirstBuf, BufVal);
350   if (!state)
351     return NULL;
352 
353   // If out-of-bounds checking is turned off, skip the rest.
354   if (!Filter.CheckCStringOutOfBounds)
355     return state;
356 
357   // Get the access length and make sure it is known.
358   // FIXME: This assumes the caller has already checked that the access length
359   // is positive. And that it's unsigned.
360   SVal LengthVal = state->getSVal(Size, LCtx);
361   NonLoc *Length = dyn_cast<NonLoc>(&LengthVal);
362   if (!Length)
363     return state;
364 
365   // Compute the offset of the last element to be accessed: size-1.
366   NonLoc One = cast<NonLoc>(svalBuilder.makeIntVal(1, sizeTy));
367   NonLoc LastOffset = cast<NonLoc>(svalBuilder.evalBinOpNN(state, BO_Sub,
368                                                     *Length, One, sizeTy));
369 
370   // Check that the first buffer is sufficiently long.
371   SVal BufStart = svalBuilder.evalCast(BufVal, PtrTy, FirstBuf->getType());
372   if (Loc *BufLoc = dyn_cast<Loc>(&BufStart)) {
373     const Expr *warningExpr = (WarnAboutSize ? Size : FirstBuf);
374 
375     SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc,
376                                           LastOffset, PtrTy);
377     state = CheckLocation(C, state, warningExpr, BufEnd, firstMessage);
378 
379     // If the buffer isn't large enough, abort.
380     if (!state)
381       return NULL;
382   }
383 
384   // If there's a second buffer, check it as well.
385   if (SecondBuf) {
386     BufVal = state->getSVal(SecondBuf, LCtx);
387     state = checkNonNull(C, state, SecondBuf, BufVal);
388     if (!state)
389       return NULL;
390 
391     BufStart = svalBuilder.evalCast(BufVal, PtrTy, SecondBuf->getType());
392     if (Loc *BufLoc = dyn_cast<Loc>(&BufStart)) {
393       const Expr *warningExpr = (WarnAboutSize ? Size : SecondBuf);
394 
395       SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc,
396                                             LastOffset, PtrTy);
397       state = CheckLocation(C, state, warningExpr, BufEnd, secondMessage);
398     }
399   }
400 
401   // Large enough or not, return this state!
402   return state;
403 }
404 
405 ProgramStateRef CStringChecker::CheckOverlap(CheckerContext &C,
406                                             ProgramStateRef state,
407                                             const Expr *Size,
408                                             const Expr *First,
409                                             const Expr *Second) const {
410   if (!Filter.CheckCStringBufferOverlap)
411     return state;
412 
413   // Do a simple check for overlap: if the two arguments are from the same
414   // buffer, see if the end of the first is greater than the start of the second
415   // or vice versa.
416 
417   // If a previous check has failed, propagate the failure.
418   if (!state)
419     return NULL;
420 
421   ProgramStateRef stateTrue, stateFalse;
422 
423   // Get the buffer values and make sure they're known locations.
424   const LocationContext *LCtx = C.getLocationContext();
425   SVal firstVal = state->getSVal(First, LCtx);
426   SVal secondVal = state->getSVal(Second, LCtx);
427 
428   Loc *firstLoc = dyn_cast<Loc>(&firstVal);
429   if (!firstLoc)
430     return state;
431 
432   Loc *secondLoc = dyn_cast<Loc>(&secondVal);
433   if (!secondLoc)
434     return state;
435 
436   // Are the two values the same?
437   SValBuilder &svalBuilder = C.getSValBuilder();
438   llvm::tie(stateTrue, stateFalse) =
439     state->assume(svalBuilder.evalEQ(state, *firstLoc, *secondLoc));
440 
441   if (stateTrue && !stateFalse) {
442     // If the values are known to be equal, that's automatically an overlap.
443     emitOverlapBug(C, stateTrue, First, Second);
444     return NULL;
445   }
446 
447   // assume the two expressions are not equal.
448   assert(stateFalse);
449   state = stateFalse;
450 
451   // Which value comes first?
452   QualType cmpTy = svalBuilder.getConditionType();
453   SVal reverse = svalBuilder.evalBinOpLL(state, BO_GT,
454                                          *firstLoc, *secondLoc, cmpTy);
455   DefinedOrUnknownSVal *reverseTest = dyn_cast<DefinedOrUnknownSVal>(&reverse);
456   if (!reverseTest)
457     return state;
458 
459   llvm::tie(stateTrue, stateFalse) = state->assume(*reverseTest);
460   if (stateTrue) {
461     if (stateFalse) {
462       // If we don't know which one comes first, we can't perform this test.
463       return state;
464     } else {
465       // Switch the values so that firstVal is before secondVal.
466       Loc *tmpLoc = firstLoc;
467       firstLoc = secondLoc;
468       secondLoc = tmpLoc;
469 
470       // Switch the Exprs as well, so that they still correspond.
471       const Expr *tmpExpr = First;
472       First = Second;
473       Second = tmpExpr;
474     }
475   }
476 
477   // Get the length, and make sure it too is known.
478   SVal LengthVal = state->getSVal(Size, LCtx);
479   NonLoc *Length = dyn_cast<NonLoc>(&LengthVal);
480   if (!Length)
481     return state;
482 
483   // Convert the first buffer's start address to char*.
484   // Bail out if the cast fails.
485   ASTContext &Ctx = svalBuilder.getContext();
486   QualType CharPtrTy = Ctx.getPointerType(Ctx.CharTy);
487   SVal FirstStart = svalBuilder.evalCast(*firstLoc, CharPtrTy,
488                                          First->getType());
489   Loc *FirstStartLoc = dyn_cast<Loc>(&FirstStart);
490   if (!FirstStartLoc)
491     return state;
492 
493   // Compute the end of the first buffer. Bail out if THAT fails.
494   SVal FirstEnd = svalBuilder.evalBinOpLN(state, BO_Add,
495                                  *FirstStartLoc, *Length, CharPtrTy);
496   Loc *FirstEndLoc = dyn_cast<Loc>(&FirstEnd);
497   if (!FirstEndLoc)
498     return state;
499 
500   // Is the end of the first buffer past the start of the second buffer?
501   SVal Overlap = svalBuilder.evalBinOpLL(state, BO_GT,
502                                 *FirstEndLoc, *secondLoc, cmpTy);
503   DefinedOrUnknownSVal *OverlapTest = dyn_cast<DefinedOrUnknownSVal>(&Overlap);
504   if (!OverlapTest)
505     return state;
506 
507   llvm::tie(stateTrue, stateFalse) = state->assume(*OverlapTest);
508 
509   if (stateTrue && !stateFalse) {
510     // Overlap!
511     emitOverlapBug(C, stateTrue, First, Second);
512     return NULL;
513   }
514 
515   // assume the two expressions don't overlap.
516   assert(stateFalse);
517   return stateFalse;
518 }
519 
520 void CStringChecker::emitOverlapBug(CheckerContext &C, ProgramStateRef state,
521                                   const Stmt *First, const Stmt *Second) const {
522   ExplodedNode *N = C.generateSink(state);
523   if (!N)
524     return;
525 
526   if (!BT_Overlap)
527     BT_Overlap.reset(new BugType("Unix API", "Improper arguments"));
528 
529   // Generate a report for this bug.
530   BugReport *report =
531     new BugReport(*BT_Overlap,
532       "Arguments must not be overlapping buffers", N);
533   report->addRange(First->getSourceRange());
534   report->addRange(Second->getSourceRange());
535 
536   C.emitReport(report);
537 }
538 
539 ProgramStateRef CStringChecker::checkAdditionOverflow(CheckerContext &C,
540                                                      ProgramStateRef state,
541                                                      NonLoc left,
542                                                      NonLoc right) const {
543   // If out-of-bounds checking is turned off, skip the rest.
544   if (!Filter.CheckCStringOutOfBounds)
545     return state;
546 
547   // If a previous check has failed, propagate the failure.
548   if (!state)
549     return NULL;
550 
551   SValBuilder &svalBuilder = C.getSValBuilder();
552   BasicValueFactory &BVF = svalBuilder.getBasicValueFactory();
553 
554   QualType sizeTy = svalBuilder.getContext().getSizeType();
555   const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy);
556   NonLoc maxVal = svalBuilder.makeIntVal(maxValInt);
557 
558   SVal maxMinusRight;
559   if (isa<nonloc::ConcreteInt>(right)) {
560     maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, right,
561                                                  sizeTy);
562   } else {
563     // Try switching the operands. (The order of these two assignments is
564     // important!)
565     maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, left,
566                                             sizeTy);
567     left = right;
568   }
569 
570   if (NonLoc *maxMinusRightNL = dyn_cast<NonLoc>(&maxMinusRight)) {
571     QualType cmpTy = svalBuilder.getConditionType();
572     // If left > max - right, we have an overflow.
573     SVal willOverflow = svalBuilder.evalBinOpNN(state, BO_GT, left,
574                                                 *maxMinusRightNL, cmpTy);
575 
576     ProgramStateRef stateOverflow, stateOkay;
577     llvm::tie(stateOverflow, stateOkay) =
578       state->assume(cast<DefinedOrUnknownSVal>(willOverflow));
579 
580     if (stateOverflow && !stateOkay) {
581       // We have an overflow. Emit a bug report.
582       ExplodedNode *N = C.generateSink(stateOverflow);
583       if (!N)
584         return NULL;
585 
586       if (!BT_AdditionOverflow)
587         BT_AdditionOverflow.reset(new BuiltinBug("API",
588           "Sum of expressions causes overflow"));
589 
590       // This isn't a great error message, but this should never occur in real
591       // code anyway -- you'd have to create a buffer longer than a size_t can
592       // represent, which is sort of a contradiction.
593       const char *warning =
594         "This expression will create a string whose length is too big to "
595         "be represented as a size_t";
596 
597       // Generate a report for this bug.
598       BugReport *report = new BugReport(*BT_AdditionOverflow, warning, N);
599       C.emitReport(report);
600 
601       return NULL;
602     }
603 
604     // From now on, assume an overflow didn't occur.
605     assert(stateOkay);
606     state = stateOkay;
607   }
608 
609   return state;
610 }
611 
612 ProgramStateRef CStringChecker::setCStringLength(ProgramStateRef state,
613                                                 const MemRegion *MR,
614                                                 SVal strLength) {
615   assert(!strLength.isUndef() && "Attempt to set an undefined string length");
616 
617   MR = MR->StripCasts();
618 
619   switch (MR->getKind()) {
620   case MemRegion::StringRegionKind:
621     // FIXME: This can happen if we strcpy() into a string region. This is
622     // undefined [C99 6.4.5p6], but we should still warn about it.
623     return state;
624 
625   case MemRegion::SymbolicRegionKind:
626   case MemRegion::AllocaRegionKind:
627   case MemRegion::VarRegionKind:
628   case MemRegion::FieldRegionKind:
629   case MemRegion::ObjCIvarRegionKind:
630     // These are the types we can currently track string lengths for.
631     break;
632 
633   case MemRegion::ElementRegionKind:
634     // FIXME: Handle element regions by upper-bounding the parent region's
635     // string length.
636     return state;
637 
638   default:
639     // Other regions (mostly non-data) can't have a reliable C string length.
640     // For now, just ignore the change.
641     // FIXME: These are rare but not impossible. We should output some kind of
642     // warning for things like strcpy((char[]){'a', 0}, "b");
643     return state;
644   }
645 
646   if (strLength.isUnknown())
647     return state->remove<CStringLength>(MR);
648 
649   return state->set<CStringLength>(MR, strLength);
650 }
651 
652 SVal CStringChecker::getCStringLengthForRegion(CheckerContext &C,
653                                                ProgramStateRef &state,
654                                                const Expr *Ex,
655                                                const MemRegion *MR,
656                                                bool hypothetical) {
657   if (!hypothetical) {
658     // If there's a recorded length, go ahead and return it.
659     const SVal *Recorded = state->get<CStringLength>(MR);
660     if (Recorded)
661       return *Recorded;
662   }
663 
664   // Otherwise, get a new symbol and update the state.
665   SValBuilder &svalBuilder = C.getSValBuilder();
666   QualType sizeTy = svalBuilder.getContext().getSizeType();
667   SVal strLength = svalBuilder.getMetadataSymbolVal(CStringChecker::getTag(),
668                                                     MR, Ex, sizeTy,
669                                                     C.blockCount());
670 
671   if (!hypothetical)
672     state = state->set<CStringLength>(MR, strLength);
673 
674   return strLength;
675 }
676 
677 SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state,
678                                       const Expr *Ex, SVal Buf,
679                                       bool hypothetical) const {
680   const MemRegion *MR = Buf.getAsRegion();
681   if (!MR) {
682     // If we can't get a region, see if it's something we /know/ isn't a
683     // C string. In the context of locations, the only time we can issue such
684     // a warning is for labels.
685     if (loc::GotoLabel *Label = dyn_cast<loc::GotoLabel>(&Buf)) {
686       if (!Filter.CheckCStringNotNullTerm)
687         return UndefinedVal();
688 
689       if (ExplodedNode *N = C.addTransition(state)) {
690         if (!BT_NotCString)
691           BT_NotCString.reset(new BuiltinBug("Unix API",
692             "Argument is not a null-terminated string."));
693 
694         SmallString<120> buf;
695         llvm::raw_svector_ostream os(buf);
696         assert(CurrentFunctionDescription);
697         os << "Argument to " << CurrentFunctionDescription
698            << " is the address of the label '" << Label->getLabel()->getName()
699            << "', which is not a null-terminated string";
700 
701         // Generate a report for this bug.
702         BugReport *report = new BugReport(*BT_NotCString,
703                                                           os.str(), N);
704 
705         report->addRange(Ex->getSourceRange());
706         C.emitReport(report);
707       }
708       return UndefinedVal();
709 
710     }
711 
712     // If it's not a region and not a label, give up.
713     return UnknownVal();
714   }
715 
716   // If we have a region, strip casts from it and see if we can figure out
717   // its length. For anything we can't figure out, just return UnknownVal.
718   MR = MR->StripCasts();
719 
720   switch (MR->getKind()) {
721   case MemRegion::StringRegionKind: {
722     // Modifying the contents of string regions is undefined [C99 6.4.5p6],
723     // so we can assume that the byte length is the correct C string length.
724     SValBuilder &svalBuilder = C.getSValBuilder();
725     QualType sizeTy = svalBuilder.getContext().getSizeType();
726     const StringLiteral *strLit = cast<StringRegion>(MR)->getStringLiteral();
727     return svalBuilder.makeIntVal(strLit->getByteLength(), sizeTy);
728   }
729   case MemRegion::SymbolicRegionKind:
730   case MemRegion::AllocaRegionKind:
731   case MemRegion::VarRegionKind:
732   case MemRegion::FieldRegionKind:
733   case MemRegion::ObjCIvarRegionKind:
734     return getCStringLengthForRegion(C, state, Ex, MR, hypothetical);
735   case MemRegion::CompoundLiteralRegionKind:
736     // FIXME: Can we track this? Is it necessary?
737     return UnknownVal();
738   case MemRegion::ElementRegionKind:
739     // FIXME: How can we handle this? It's not good enough to subtract the
740     // offset from the base string length; consider "123\x00567" and &a[5].
741     return UnknownVal();
742   default:
743     // Other regions (mostly non-data) can't have a reliable C string length.
744     // In this case, an error is emitted and UndefinedVal is returned.
745     // The caller should always be prepared to handle this case.
746     if (!Filter.CheckCStringNotNullTerm)
747       return UndefinedVal();
748 
749     if (ExplodedNode *N = C.addTransition(state)) {
750       if (!BT_NotCString)
751         BT_NotCString.reset(new BuiltinBug("Unix API",
752           "Argument is not a null-terminated string."));
753 
754       SmallString<120> buf;
755       llvm::raw_svector_ostream os(buf);
756 
757       assert(CurrentFunctionDescription);
758       os << "Argument to " << CurrentFunctionDescription << " is ";
759 
760       if (SummarizeRegion(os, C.getASTContext(), MR))
761         os << ", which is not a null-terminated string";
762       else
763         os << "not a null-terminated string";
764 
765       // Generate a report for this bug.
766       BugReport *report = new BugReport(*BT_NotCString,
767                                                         os.str(), N);
768 
769       report->addRange(Ex->getSourceRange());
770       C.emitReport(report);
771     }
772 
773     return UndefinedVal();
774   }
775 }
776 
777 const StringLiteral *CStringChecker::getCStringLiteral(CheckerContext &C,
778   ProgramStateRef &state, const Expr *expr, SVal val) const {
779 
780   // Get the memory region pointed to by the val.
781   const MemRegion *bufRegion = val.getAsRegion();
782   if (!bufRegion)
783     return NULL;
784 
785   // Strip casts off the memory region.
786   bufRegion = bufRegion->StripCasts();
787 
788   // Cast the memory region to a string region.
789   const StringRegion *strRegion= dyn_cast<StringRegion>(bufRegion);
790   if (!strRegion)
791     return NULL;
792 
793   // Return the actual string in the string region.
794   return strRegion->getStringLiteral();
795 }
796 
797 ProgramStateRef CStringChecker::InvalidateBuffer(CheckerContext &C,
798                                                 ProgramStateRef state,
799                                                 const Expr *E, SVal V) {
800   Loc *L = dyn_cast<Loc>(&V);
801   if (!L)
802     return state;
803 
804   // FIXME: This is a simplified version of what's in CFRefCount.cpp -- it makes
805   // some assumptions about the value that CFRefCount can't. Even so, it should
806   // probably be refactored.
807   if (loc::MemRegionVal* MR = dyn_cast<loc::MemRegionVal>(L)) {
808     const MemRegion *R = MR->getRegion()->StripCasts();
809 
810     // Are we dealing with an ElementRegion?  If so, we should be invalidating
811     // the super-region.
812     if (const ElementRegion *ER = dyn_cast<ElementRegion>(R)) {
813       R = ER->getSuperRegion();
814       // FIXME: What about layers of ElementRegions?
815     }
816 
817     // Invalidate this region.
818     const LocationContext *LCtx = C.getPredecessor()->getLocationContext();
819     return state->invalidateRegions(R, E, C.blockCount(), LCtx);
820   }
821 
822   // If we have a non-region value by chance, just remove the binding.
823   // FIXME: is this necessary or correct? This handles the non-Region
824   //  cases.  Is it ever valid to store to these?
825   return state->killBinding(*L);
826 }
827 
828 bool CStringChecker::SummarizeRegion(raw_ostream &os, ASTContext &Ctx,
829                                      const MemRegion *MR) {
830   const TypedValueRegion *TVR = dyn_cast<TypedValueRegion>(MR);
831 
832   switch (MR->getKind()) {
833   case MemRegion::FunctionTextRegionKind: {
834     const NamedDecl *FD = cast<FunctionTextRegion>(MR)->getDecl();
835     if (FD)
836       os << "the address of the function '" << *FD << '\'';
837     else
838       os << "the address of a function";
839     return true;
840   }
841   case MemRegion::BlockTextRegionKind:
842     os << "block text";
843     return true;
844   case MemRegion::BlockDataRegionKind:
845     os << "a block";
846     return true;
847   case MemRegion::CXXThisRegionKind:
848   case MemRegion::CXXTempObjectRegionKind:
849     os << "a C++ temp object of type " << TVR->getValueType().getAsString();
850     return true;
851   case MemRegion::VarRegionKind:
852     os << "a variable of type" << TVR->getValueType().getAsString();
853     return true;
854   case MemRegion::FieldRegionKind:
855     os << "a field of type " << TVR->getValueType().getAsString();
856     return true;
857   case MemRegion::ObjCIvarRegionKind:
858     os << "an instance variable of type " << TVR->getValueType().getAsString();
859     return true;
860   default:
861     return false;
862   }
863 }
864 
865 //===----------------------------------------------------------------------===//
866 // evaluation of individual function calls.
867 //===----------------------------------------------------------------------===//
868 
869 void CStringChecker::evalCopyCommon(CheckerContext &C,
870                                     const CallExpr *CE,
871                                     ProgramStateRef state,
872                                     const Expr *Size, const Expr *Dest,
873                                     const Expr *Source, bool Restricted,
874                                     bool IsMempcpy) const {
875   CurrentFunctionDescription = "memory copy function";
876 
877   // See if the size argument is zero.
878   const LocationContext *LCtx = C.getLocationContext();
879   SVal sizeVal = state->getSVal(Size, LCtx);
880   QualType sizeTy = Size->getType();
881 
882   ProgramStateRef stateZeroSize, stateNonZeroSize;
883   llvm::tie(stateZeroSize, stateNonZeroSize) =
884     assumeZero(C, state, sizeVal, sizeTy);
885 
886   // Get the value of the Dest.
887   SVal destVal = state->getSVal(Dest, LCtx);
888 
889   // If the size is zero, there won't be any actual memory access, so
890   // just bind the return value to the destination buffer and return.
891   if (stateZeroSize && !stateNonZeroSize) {
892     stateZeroSize = stateZeroSize->BindExpr(CE, LCtx, destVal);
893     C.addTransition(stateZeroSize);
894     return;
895   }
896 
897   // If the size can be nonzero, we have to check the other arguments.
898   if (stateNonZeroSize) {
899     state = stateNonZeroSize;
900 
901     // Ensure the destination is not null. If it is NULL there will be a
902     // NULL pointer dereference.
903     state = checkNonNull(C, state, Dest, destVal);
904     if (!state)
905       return;
906 
907     // Get the value of the Src.
908     SVal srcVal = state->getSVal(Source, LCtx);
909 
910     // Ensure the source is not null. If it is NULL there will be a
911     // NULL pointer dereference.
912     state = checkNonNull(C, state, Source, srcVal);
913     if (!state)
914       return;
915 
916     // Ensure the accesses are valid and that the buffers do not overlap.
917     const char * const writeWarning =
918       "Memory copy function overflows destination buffer";
919     state = CheckBufferAccess(C, state, Size, Dest, Source,
920                               writeWarning, /* sourceWarning = */ NULL);
921     if (Restricted)
922       state = CheckOverlap(C, state, Size, Dest, Source);
923 
924     if (!state)
925       return;
926 
927     // If this is mempcpy, get the byte after the last byte copied and
928     // bind the expr.
929     if (IsMempcpy) {
930       loc::MemRegionVal *destRegVal = dyn_cast<loc::MemRegionVal>(&destVal);
931       assert(destRegVal && "Destination should be a known MemRegionVal here");
932 
933       // Get the length to copy.
934       NonLoc *lenValNonLoc = dyn_cast<NonLoc>(&sizeVal);
935 
936       if (lenValNonLoc) {
937         // Get the byte after the last byte copied.
938         SVal lastElement = C.getSValBuilder().evalBinOpLN(state, BO_Add,
939                                                           *destRegVal,
940                                                           *lenValNonLoc,
941                                                           Dest->getType());
942 
943         // The byte after the last byte copied is the return value.
944         state = state->BindExpr(CE, LCtx, lastElement);
945       } else {
946         // If we don't know how much we copied, we can at least
947         // conjure a return value for later.
948         SVal result = C.getSValBuilder().conjureSymbolVal(0, CE, LCtx,
949                                                           C.blockCount());
950         state = state->BindExpr(CE, LCtx, result);
951       }
952 
953     } else {
954       // All other copies return the destination buffer.
955       // (Well, bcopy() has a void return type, but this won't hurt.)
956       state = state->BindExpr(CE, LCtx, destVal);
957     }
958 
959     // Invalidate the destination.
960     // FIXME: Even if we can't perfectly model the copy, we should see if we
961     // can use LazyCompoundVals to copy the source values into the destination.
962     // This would probably remove any existing bindings past the end of the
963     // copied region, but that's still an improvement over blank invalidation.
964     state = InvalidateBuffer(C, state, Dest,
965                              state->getSVal(Dest, C.getLocationContext()));
966     C.addTransition(state);
967   }
968 }
969 
970 
971 void CStringChecker::evalMemcpy(CheckerContext &C, const CallExpr *CE) const {
972   if (CE->getNumArgs() < 3)
973     return;
974 
975   // void *memcpy(void *restrict dst, const void *restrict src, size_t n);
976   // The return value is the address of the destination buffer.
977   const Expr *Dest = CE->getArg(0);
978   ProgramStateRef state = C.getState();
979 
980   evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1), true);
981 }
982 
983 void CStringChecker::evalMempcpy(CheckerContext &C, const CallExpr *CE) const {
984   if (CE->getNumArgs() < 3)
985     return;
986 
987   // void *mempcpy(void *restrict dst, const void *restrict src, size_t n);
988   // The return value is a pointer to the byte following the last written byte.
989   const Expr *Dest = CE->getArg(0);
990   ProgramStateRef state = C.getState();
991 
992   evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1), true, true);
993 }
994 
995 void CStringChecker::evalMemmove(CheckerContext &C, const CallExpr *CE) const {
996   if (CE->getNumArgs() < 3)
997     return;
998 
999   // void *memmove(void *dst, const void *src, size_t n);
1000   // The return value is the address of the destination buffer.
1001   const Expr *Dest = CE->getArg(0);
1002   ProgramStateRef state = C.getState();
1003 
1004   evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1));
1005 }
1006 
1007 void CStringChecker::evalBcopy(CheckerContext &C, const CallExpr *CE) const {
1008   if (CE->getNumArgs() < 3)
1009     return;
1010 
1011   // void bcopy(const void *src, void *dst, size_t n);
1012   evalCopyCommon(C, CE, C.getState(),
1013                  CE->getArg(2), CE->getArg(1), CE->getArg(0));
1014 }
1015 
1016 void CStringChecker::evalMemcmp(CheckerContext &C, const CallExpr *CE) const {
1017   if (CE->getNumArgs() < 3)
1018     return;
1019 
1020   // int memcmp(const void *s1, const void *s2, size_t n);
1021   CurrentFunctionDescription = "memory comparison function";
1022 
1023   const Expr *Left = CE->getArg(0);
1024   const Expr *Right = CE->getArg(1);
1025   const Expr *Size = CE->getArg(2);
1026 
1027   ProgramStateRef state = C.getState();
1028   SValBuilder &svalBuilder = C.getSValBuilder();
1029 
1030   // See if the size argument is zero.
1031   const LocationContext *LCtx = C.getLocationContext();
1032   SVal sizeVal = state->getSVal(Size, LCtx);
1033   QualType sizeTy = Size->getType();
1034 
1035   ProgramStateRef stateZeroSize, stateNonZeroSize;
1036   llvm::tie(stateZeroSize, stateNonZeroSize) =
1037     assumeZero(C, state, sizeVal, sizeTy);
1038 
1039   // If the size can be zero, the result will be 0 in that case, and we don't
1040   // have to check either of the buffers.
1041   if (stateZeroSize) {
1042     state = stateZeroSize;
1043     state = state->BindExpr(CE, LCtx,
1044                             svalBuilder.makeZeroVal(CE->getType()));
1045     C.addTransition(state);
1046   }
1047 
1048   // If the size can be nonzero, we have to check the other arguments.
1049   if (stateNonZeroSize) {
1050     state = stateNonZeroSize;
1051     // If we know the two buffers are the same, we know the result is 0.
1052     // First, get the two buffers' addresses. Another checker will have already
1053     // made sure they're not undefined.
1054     DefinedOrUnknownSVal LV =
1055       cast<DefinedOrUnknownSVal>(state->getSVal(Left, LCtx));
1056     DefinedOrUnknownSVal RV =
1057       cast<DefinedOrUnknownSVal>(state->getSVal(Right, LCtx));
1058 
1059     // See if they are the same.
1060     DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV);
1061     ProgramStateRef StSameBuf, StNotSameBuf;
1062     llvm::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf);
1063 
1064     // If the two arguments might be the same buffer, we know the result is 0,
1065     // and we only need to check one size.
1066     if (StSameBuf) {
1067       state = StSameBuf;
1068       state = CheckBufferAccess(C, state, Size, Left);
1069       if (state) {
1070         state = StSameBuf->BindExpr(CE, LCtx,
1071                                     svalBuilder.makeZeroVal(CE->getType()));
1072         C.addTransition(state);
1073       }
1074     }
1075 
1076     // If the two arguments might be different buffers, we have to check the
1077     // size of both of them.
1078     if (StNotSameBuf) {
1079       state = StNotSameBuf;
1080       state = CheckBufferAccess(C, state, Size, Left, Right);
1081       if (state) {
1082         // The return value is the comparison result, which we don't know.
1083         SVal CmpV = svalBuilder.conjureSymbolVal(0, CE, LCtx, C.blockCount());
1084         state = state->BindExpr(CE, LCtx, CmpV);
1085         C.addTransition(state);
1086       }
1087     }
1088   }
1089 }
1090 
1091 void CStringChecker::evalstrLength(CheckerContext &C,
1092                                    const CallExpr *CE) const {
1093   if (CE->getNumArgs() < 1)
1094     return;
1095 
1096   // size_t strlen(const char *s);
1097   evalstrLengthCommon(C, CE, /* IsStrnlen = */ false);
1098 }
1099 
1100 void CStringChecker::evalstrnLength(CheckerContext &C,
1101                                     const CallExpr *CE) const {
1102   if (CE->getNumArgs() < 2)
1103     return;
1104 
1105   // size_t strnlen(const char *s, size_t maxlen);
1106   evalstrLengthCommon(C, CE, /* IsStrnlen = */ true);
1107 }
1108 
1109 void CStringChecker::evalstrLengthCommon(CheckerContext &C, const CallExpr *CE,
1110                                          bool IsStrnlen) const {
1111   CurrentFunctionDescription = "string length function";
1112   ProgramStateRef state = C.getState();
1113   const LocationContext *LCtx = C.getLocationContext();
1114 
1115   if (IsStrnlen) {
1116     const Expr *maxlenExpr = CE->getArg(1);
1117     SVal maxlenVal = state->getSVal(maxlenExpr, LCtx);
1118 
1119     ProgramStateRef stateZeroSize, stateNonZeroSize;
1120     llvm::tie(stateZeroSize, stateNonZeroSize) =
1121       assumeZero(C, state, maxlenVal, maxlenExpr->getType());
1122 
1123     // If the size can be zero, the result will be 0 in that case, and we don't
1124     // have to check the string itself.
1125     if (stateZeroSize) {
1126       SVal zero = C.getSValBuilder().makeZeroVal(CE->getType());
1127       stateZeroSize = stateZeroSize->BindExpr(CE, LCtx, zero);
1128       C.addTransition(stateZeroSize);
1129     }
1130 
1131     // If the size is GUARANTEED to be zero, we're done!
1132     if (!stateNonZeroSize)
1133       return;
1134 
1135     // Otherwise, record the assumption that the size is nonzero.
1136     state = stateNonZeroSize;
1137   }
1138 
1139   // Check that the string argument is non-null.
1140   const Expr *Arg = CE->getArg(0);
1141   SVal ArgVal = state->getSVal(Arg, LCtx);
1142 
1143   state = checkNonNull(C, state, Arg, ArgVal);
1144 
1145   if (!state)
1146     return;
1147 
1148   SVal strLength = getCStringLength(C, state, Arg, ArgVal);
1149 
1150   // If the argument isn't a valid C string, there's no valid state to
1151   // transition to.
1152   if (strLength.isUndef())
1153     return;
1154 
1155   DefinedOrUnknownSVal result = UnknownVal();
1156 
1157   // If the check is for strnlen() then bind the return value to no more than
1158   // the maxlen value.
1159   if (IsStrnlen) {
1160     QualType cmpTy = C.getSValBuilder().getConditionType();
1161 
1162     // It's a little unfortunate to be getting this again,
1163     // but it's not that expensive...
1164     const Expr *maxlenExpr = CE->getArg(1);
1165     SVal maxlenVal = state->getSVal(maxlenExpr, LCtx);
1166 
1167     NonLoc *strLengthNL = dyn_cast<NonLoc>(&strLength);
1168     NonLoc *maxlenValNL = dyn_cast<NonLoc>(&maxlenVal);
1169 
1170     if (strLengthNL && maxlenValNL) {
1171       ProgramStateRef stateStringTooLong, stateStringNotTooLong;
1172 
1173       // Check if the strLength is greater than the maxlen.
1174       llvm::tie(stateStringTooLong, stateStringNotTooLong) =
1175         state->assume(cast<DefinedOrUnknownSVal>
1176                       (C.getSValBuilder().evalBinOpNN(state, BO_GT,
1177                                                       *strLengthNL,
1178                                                       *maxlenValNL,
1179                                                       cmpTy)));
1180 
1181       if (stateStringTooLong && !stateStringNotTooLong) {
1182         // If the string is longer than maxlen, return maxlen.
1183         result = *maxlenValNL;
1184       } else if (stateStringNotTooLong && !stateStringTooLong) {
1185         // If the string is shorter than maxlen, return its length.
1186         result = *strLengthNL;
1187       }
1188     }
1189 
1190     if (result.isUnknown()) {
1191       // If we don't have enough information for a comparison, there's
1192       // no guarantee the full string length will actually be returned.
1193       // All we know is the return value is the min of the string length
1194       // and the limit. This is better than nothing.
1195       result = C.getSValBuilder().conjureSymbolVal(0, CE, LCtx, C.blockCount());
1196       NonLoc *resultNL = cast<NonLoc>(&result);
1197 
1198       if (strLengthNL) {
1199         state = state->assume(cast<DefinedOrUnknownSVal>
1200                               (C.getSValBuilder().evalBinOpNN(state, BO_LE,
1201                                                               *resultNL,
1202                                                               *strLengthNL,
1203                                                               cmpTy)), true);
1204       }
1205 
1206       if (maxlenValNL) {
1207         state = state->assume(cast<DefinedOrUnknownSVal>
1208                               (C.getSValBuilder().evalBinOpNN(state, BO_LE,
1209                                                               *resultNL,
1210                                                               *maxlenValNL,
1211                                                               cmpTy)), true);
1212       }
1213     }
1214 
1215   } else {
1216     // This is a plain strlen(), not strnlen().
1217     result = cast<DefinedOrUnknownSVal>(strLength);
1218 
1219     // If we don't know the length of the string, conjure a return
1220     // value, so it can be used in constraints, at least.
1221     if (result.isUnknown()) {
1222       result = C.getSValBuilder().conjureSymbolVal(0, CE, LCtx, C.blockCount());
1223     }
1224   }
1225 
1226   // Bind the return value.
1227   assert(!result.isUnknown() && "Should have conjured a value by now");
1228   state = state->BindExpr(CE, LCtx, result);
1229   C.addTransition(state);
1230 }
1231 
1232 void CStringChecker::evalStrcpy(CheckerContext &C, const CallExpr *CE) const {
1233   if (CE->getNumArgs() < 2)
1234     return;
1235 
1236   // char *strcpy(char *restrict dst, const char *restrict src);
1237   evalStrcpyCommon(C, CE,
1238                    /* returnEnd = */ false,
1239                    /* isBounded = */ false,
1240                    /* isAppending = */ false);
1241 }
1242 
1243 void CStringChecker::evalStrncpy(CheckerContext &C, const CallExpr *CE) const {
1244   if (CE->getNumArgs() < 3)
1245     return;
1246 
1247   // char *strncpy(char *restrict dst, const char *restrict src, size_t n);
1248   evalStrcpyCommon(C, CE,
1249                    /* returnEnd = */ false,
1250                    /* isBounded = */ true,
1251                    /* isAppending = */ false);
1252 }
1253 
1254 void CStringChecker::evalStpcpy(CheckerContext &C, const CallExpr *CE) const {
1255   if (CE->getNumArgs() < 2)
1256     return;
1257 
1258   // char *stpcpy(char *restrict dst, const char *restrict src);
1259   evalStrcpyCommon(C, CE,
1260                    /* returnEnd = */ true,
1261                    /* isBounded = */ false,
1262                    /* isAppending = */ false);
1263 }
1264 
1265 void CStringChecker::evalStrcat(CheckerContext &C, const CallExpr *CE) const {
1266   if (CE->getNumArgs() < 2)
1267     return;
1268 
1269   //char *strcat(char *restrict s1, const char *restrict s2);
1270   evalStrcpyCommon(C, CE,
1271                    /* returnEnd = */ false,
1272                    /* isBounded = */ false,
1273                    /* isAppending = */ true);
1274 }
1275 
1276 void CStringChecker::evalStrncat(CheckerContext &C, const CallExpr *CE) const {
1277   if (CE->getNumArgs() < 3)
1278     return;
1279 
1280   //char *strncat(char *restrict s1, const char *restrict s2, size_t n);
1281   evalStrcpyCommon(C, CE,
1282                    /* returnEnd = */ false,
1283                    /* isBounded = */ true,
1284                    /* isAppending = */ true);
1285 }
1286 
1287 void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallExpr *CE,
1288                                       bool returnEnd, bool isBounded,
1289                                       bool isAppending) const {
1290   CurrentFunctionDescription = "string copy function";
1291   ProgramStateRef state = C.getState();
1292   const LocationContext *LCtx = C.getLocationContext();
1293 
1294   // Check that the destination is non-null.
1295   const Expr *Dst = CE->getArg(0);
1296   SVal DstVal = state->getSVal(Dst, LCtx);
1297 
1298   state = checkNonNull(C, state, Dst, DstVal);
1299   if (!state)
1300     return;
1301 
1302   // Check that the source is non-null.
1303   const Expr *srcExpr = CE->getArg(1);
1304   SVal srcVal = state->getSVal(srcExpr, LCtx);
1305   state = checkNonNull(C, state, srcExpr, srcVal);
1306   if (!state)
1307     return;
1308 
1309   // Get the string length of the source.
1310   SVal strLength = getCStringLength(C, state, srcExpr, srcVal);
1311 
1312   // If the source isn't a valid C string, give up.
1313   if (strLength.isUndef())
1314     return;
1315 
1316   SValBuilder &svalBuilder = C.getSValBuilder();
1317   QualType cmpTy = svalBuilder.getConditionType();
1318   QualType sizeTy = svalBuilder.getContext().getSizeType();
1319 
1320   // These two values allow checking two kinds of errors:
1321   // - actual overflows caused by a source that doesn't fit in the destination
1322   // - potential overflows caused by a bound that could exceed the destination
1323   SVal amountCopied = UnknownVal();
1324   SVal maxLastElementIndex = UnknownVal();
1325   const char *boundWarning = NULL;
1326 
1327   // If the function is strncpy, strncat, etc... it is bounded.
1328   if (isBounded) {
1329     // Get the max number of characters to copy.
1330     const Expr *lenExpr = CE->getArg(2);
1331     SVal lenVal = state->getSVal(lenExpr, LCtx);
1332 
1333     // Protect against misdeclared strncpy().
1334     lenVal = svalBuilder.evalCast(lenVal, sizeTy, lenExpr->getType());
1335 
1336     NonLoc *strLengthNL = dyn_cast<NonLoc>(&strLength);
1337     NonLoc *lenValNL = dyn_cast<NonLoc>(&lenVal);
1338 
1339     // If we know both values, we might be able to figure out how much
1340     // we're copying.
1341     if (strLengthNL && lenValNL) {
1342       ProgramStateRef stateSourceTooLong, stateSourceNotTooLong;
1343 
1344       // Check if the max number to copy is less than the length of the src.
1345       // If the bound is equal to the source length, strncpy won't null-
1346       // terminate the result!
1347       llvm::tie(stateSourceTooLong, stateSourceNotTooLong) =
1348         state->assume(cast<DefinedOrUnknownSVal>
1349                       (svalBuilder.evalBinOpNN(state, BO_GE, *strLengthNL,
1350                                                *lenValNL, cmpTy)));
1351 
1352       if (stateSourceTooLong && !stateSourceNotTooLong) {
1353         // Max number to copy is less than the length of the src, so the actual
1354         // strLength copied is the max number arg.
1355         state = stateSourceTooLong;
1356         amountCopied = lenVal;
1357 
1358       } else if (!stateSourceTooLong && stateSourceNotTooLong) {
1359         // The source buffer entirely fits in the bound.
1360         state = stateSourceNotTooLong;
1361         amountCopied = strLength;
1362       }
1363     }
1364 
1365     // We still want to know if the bound is known to be too large.
1366     if (lenValNL) {
1367       if (isAppending) {
1368         // For strncat, the check is strlen(dst) + lenVal < sizeof(dst)
1369 
1370         // Get the string length of the destination. If the destination is
1371         // memory that can't have a string length, we shouldn't be copying
1372         // into it anyway.
1373         SVal dstStrLength = getCStringLength(C, state, Dst, DstVal);
1374         if (dstStrLength.isUndef())
1375           return;
1376 
1377         if (NonLoc *dstStrLengthNL = dyn_cast<NonLoc>(&dstStrLength)) {
1378           maxLastElementIndex = svalBuilder.evalBinOpNN(state, BO_Add,
1379                                                         *lenValNL,
1380                                                         *dstStrLengthNL,
1381                                                         sizeTy);
1382           boundWarning = "Size argument is greater than the free space in the "
1383                          "destination buffer";
1384         }
1385 
1386       } else {
1387         // For strncpy, this is just checking that lenVal <= sizeof(dst)
1388         // (Yes, strncpy and strncat differ in how they treat termination.
1389         // strncat ALWAYS terminates, but strncpy doesn't.)
1390 
1391         // We need a special case for when the copy size is zero, in which
1392         // case strncpy will do no work at all. Our bounds check uses n-1
1393         // as the last element accessed, so n == 0 is problematic.
1394         ProgramStateRef StateZeroSize, StateNonZeroSize;
1395         llvm::tie(StateZeroSize, StateNonZeroSize) =
1396           assumeZero(C, state, *lenValNL, sizeTy);
1397 
1398         // If the size is known to be zero, we're done.
1399         if (StateZeroSize && !StateNonZeroSize) {
1400           StateZeroSize = StateZeroSize->BindExpr(CE, LCtx, DstVal);
1401           C.addTransition(StateZeroSize);
1402           return;
1403         }
1404 
1405         // Otherwise, go ahead and figure out the last element we'll touch.
1406         // We don't record the non-zero assumption here because we can't
1407         // be sure. We won't warn on a possible zero.
1408         NonLoc one = cast<NonLoc>(svalBuilder.makeIntVal(1, sizeTy));
1409         maxLastElementIndex = svalBuilder.evalBinOpNN(state, BO_Sub, *lenValNL,
1410                                                       one, sizeTy);
1411         boundWarning = "Size argument is greater than the length of the "
1412                        "destination buffer";
1413       }
1414     }
1415 
1416     // If we couldn't pin down the copy length, at least bound it.
1417     // FIXME: We should actually run this code path for append as well, but
1418     // right now it creates problems with constraints (since we can end up
1419     // trying to pass constraints from symbol to symbol).
1420     if (amountCopied.isUnknown() && !isAppending) {
1421       // Try to get a "hypothetical" string length symbol, which we can later
1422       // set as a real value if that turns out to be the case.
1423       amountCopied = getCStringLength(C, state, lenExpr, srcVal, true);
1424       assert(!amountCopied.isUndef());
1425 
1426       if (NonLoc *amountCopiedNL = dyn_cast<NonLoc>(&amountCopied)) {
1427         if (lenValNL) {
1428           // amountCopied <= lenVal
1429           SVal copiedLessThanBound = svalBuilder.evalBinOpNN(state, BO_LE,
1430                                                              *amountCopiedNL,
1431                                                              *lenValNL,
1432                                                              cmpTy);
1433           state = state->assume(cast<DefinedOrUnknownSVal>(copiedLessThanBound),
1434                                 true);
1435           if (!state)
1436             return;
1437         }
1438 
1439         if (strLengthNL) {
1440           // amountCopied <= strlen(source)
1441           SVal copiedLessThanSrc = svalBuilder.evalBinOpNN(state, BO_LE,
1442                                                            *amountCopiedNL,
1443                                                            *strLengthNL,
1444                                                            cmpTy);
1445           state = state->assume(cast<DefinedOrUnknownSVal>(copiedLessThanSrc),
1446                                 true);
1447           if (!state)
1448             return;
1449         }
1450       }
1451     }
1452 
1453   } else {
1454     // The function isn't bounded. The amount copied should match the length
1455     // of the source buffer.
1456     amountCopied = strLength;
1457   }
1458 
1459   assert(state);
1460 
1461   // This represents the number of characters copied into the destination
1462   // buffer. (It may not actually be the strlen if the destination buffer
1463   // is not terminated.)
1464   SVal finalStrLength = UnknownVal();
1465 
1466   // If this is an appending function (strcat, strncat...) then set the
1467   // string length to strlen(src) + strlen(dst) since the buffer will
1468   // ultimately contain both.
1469   if (isAppending) {
1470     // Get the string length of the destination. If the destination is memory
1471     // that can't have a string length, we shouldn't be copying into it anyway.
1472     SVal dstStrLength = getCStringLength(C, state, Dst, DstVal);
1473     if (dstStrLength.isUndef())
1474       return;
1475 
1476     NonLoc *srcStrLengthNL = dyn_cast<NonLoc>(&amountCopied);
1477     NonLoc *dstStrLengthNL = dyn_cast<NonLoc>(&dstStrLength);
1478 
1479     // If we know both string lengths, we might know the final string length.
1480     if (srcStrLengthNL && dstStrLengthNL) {
1481       // Make sure the two lengths together don't overflow a size_t.
1482       state = checkAdditionOverflow(C, state, *srcStrLengthNL, *dstStrLengthNL);
1483       if (!state)
1484         return;
1485 
1486       finalStrLength = svalBuilder.evalBinOpNN(state, BO_Add, *srcStrLengthNL,
1487                                                *dstStrLengthNL, sizeTy);
1488     }
1489 
1490     // If we couldn't get a single value for the final string length,
1491     // we can at least bound it by the individual lengths.
1492     if (finalStrLength.isUnknown()) {
1493       // Try to get a "hypothetical" string length symbol, which we can later
1494       // set as a real value if that turns out to be the case.
1495       finalStrLength = getCStringLength(C, state, CE, DstVal, true);
1496       assert(!finalStrLength.isUndef());
1497 
1498       if (NonLoc *finalStrLengthNL = dyn_cast<NonLoc>(&finalStrLength)) {
1499         if (srcStrLengthNL) {
1500           // finalStrLength >= srcStrLength
1501           SVal sourceInResult = svalBuilder.evalBinOpNN(state, BO_GE,
1502                                                         *finalStrLengthNL,
1503                                                         *srcStrLengthNL,
1504                                                         cmpTy);
1505           state = state->assume(cast<DefinedOrUnknownSVal>(sourceInResult),
1506                                 true);
1507           if (!state)
1508             return;
1509         }
1510 
1511         if (dstStrLengthNL) {
1512           // finalStrLength >= dstStrLength
1513           SVal destInResult = svalBuilder.evalBinOpNN(state, BO_GE,
1514                                                       *finalStrLengthNL,
1515                                                       *dstStrLengthNL,
1516                                                       cmpTy);
1517           state = state->assume(cast<DefinedOrUnknownSVal>(destInResult),
1518                                 true);
1519           if (!state)
1520             return;
1521         }
1522       }
1523     }
1524 
1525   } else {
1526     // Otherwise, this is a copy-over function (strcpy, strncpy, ...), and
1527     // the final string length will match the input string length.
1528     finalStrLength = amountCopied;
1529   }
1530 
1531   // The final result of the function will either be a pointer past the last
1532   // copied element, or a pointer to the start of the destination buffer.
1533   SVal Result = (returnEnd ? UnknownVal() : DstVal);
1534 
1535   assert(state);
1536 
1537   // If the destination is a MemRegion, try to check for a buffer overflow and
1538   // record the new string length.
1539   if (loc::MemRegionVal *dstRegVal = dyn_cast<loc::MemRegionVal>(&DstVal)) {
1540     QualType ptrTy = Dst->getType();
1541 
1542     // If we have an exact value on a bounded copy, use that to check for
1543     // overflows, rather than our estimate about how much is actually copied.
1544     if (boundWarning) {
1545       if (NonLoc *maxLastNL = dyn_cast<NonLoc>(&maxLastElementIndex)) {
1546         SVal maxLastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal,
1547                                                       *maxLastNL, ptrTy);
1548         state = CheckLocation(C, state, CE->getArg(2), maxLastElement,
1549                               boundWarning);
1550         if (!state)
1551           return;
1552       }
1553     }
1554 
1555     // Then, if the final length is known...
1556     if (NonLoc *knownStrLength = dyn_cast<NonLoc>(&finalStrLength)) {
1557       SVal lastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal,
1558                                                  *knownStrLength, ptrTy);
1559 
1560       // ...and we haven't checked the bound, we'll check the actual copy.
1561       if (!boundWarning) {
1562         const char * const warningMsg =
1563           "String copy function overflows destination buffer";
1564         state = CheckLocation(C, state, Dst, lastElement, warningMsg);
1565         if (!state)
1566           return;
1567       }
1568 
1569       // If this is a stpcpy-style copy, the last element is the return value.
1570       if (returnEnd)
1571         Result = lastElement;
1572     }
1573 
1574     // Invalidate the destination. This must happen before we set the C string
1575     // length because invalidation will clear the length.
1576     // FIXME: Even if we can't perfectly model the copy, we should see if we
1577     // can use LazyCompoundVals to copy the source values into the destination.
1578     // This would probably remove any existing bindings past the end of the
1579     // string, but that's still an improvement over blank invalidation.
1580     state = InvalidateBuffer(C, state, Dst, *dstRegVal);
1581 
1582     // Set the C string length of the destination, if we know it.
1583     if (isBounded && !isAppending) {
1584       // strncpy is annoying in that it doesn't guarantee to null-terminate
1585       // the result string. If the original string didn't fit entirely inside
1586       // the bound (including the null-terminator), we don't know how long the
1587       // result is.
1588       if (amountCopied != strLength)
1589         finalStrLength = UnknownVal();
1590     }
1591     state = setCStringLength(state, dstRegVal->getRegion(), finalStrLength);
1592   }
1593 
1594   assert(state);
1595 
1596   // If this is a stpcpy-style copy, but we were unable to check for a buffer
1597   // overflow, we still need a result. Conjure a return value.
1598   if (returnEnd && Result.isUnknown()) {
1599     Result = svalBuilder.conjureSymbolVal(0, CE, LCtx, C.blockCount());
1600   }
1601 
1602   // Set the return value.
1603   state = state->BindExpr(CE, LCtx, Result);
1604   C.addTransition(state);
1605 }
1606 
1607 void CStringChecker::evalStrcmp(CheckerContext &C, const CallExpr *CE) const {
1608   if (CE->getNumArgs() < 2)
1609     return;
1610 
1611   //int strcmp(const char *s1, const char *s2);
1612   evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ false);
1613 }
1614 
1615 void CStringChecker::evalStrncmp(CheckerContext &C, const CallExpr *CE) const {
1616   if (CE->getNumArgs() < 3)
1617     return;
1618 
1619   //int strncmp(const char *s1, const char *s2, size_t n);
1620   evalStrcmpCommon(C, CE, /* isBounded = */ true, /* ignoreCase = */ false);
1621 }
1622 
1623 void CStringChecker::evalStrcasecmp(CheckerContext &C,
1624                                     const CallExpr *CE) const {
1625   if (CE->getNumArgs() < 2)
1626     return;
1627 
1628   //int strcasecmp(const char *s1, const char *s2);
1629   evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ true);
1630 }
1631 
1632 void CStringChecker::evalStrncasecmp(CheckerContext &C,
1633                                      const CallExpr *CE) const {
1634   if (CE->getNumArgs() < 3)
1635     return;
1636 
1637   //int strncasecmp(const char *s1, const char *s2, size_t n);
1638   evalStrcmpCommon(C, CE, /* isBounded = */ true, /* ignoreCase = */ true);
1639 }
1640 
1641 void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallExpr *CE,
1642                                       bool isBounded, bool ignoreCase) const {
1643   CurrentFunctionDescription = "string comparison function";
1644   ProgramStateRef state = C.getState();
1645   const LocationContext *LCtx = C.getLocationContext();
1646 
1647   // Check that the first string is non-null
1648   const Expr *s1 = CE->getArg(0);
1649   SVal s1Val = state->getSVal(s1, LCtx);
1650   state = checkNonNull(C, state, s1, s1Val);
1651   if (!state)
1652     return;
1653 
1654   // Check that the second string is non-null.
1655   const Expr *s2 = CE->getArg(1);
1656   SVal s2Val = state->getSVal(s2, LCtx);
1657   state = checkNonNull(C, state, s2, s2Val);
1658   if (!state)
1659     return;
1660 
1661   // Get the string length of the first string or give up.
1662   SVal s1Length = getCStringLength(C, state, s1, s1Val);
1663   if (s1Length.isUndef())
1664     return;
1665 
1666   // Get the string length of the second string or give up.
1667   SVal s2Length = getCStringLength(C, state, s2, s2Val);
1668   if (s2Length.isUndef())
1669     return;
1670 
1671   // If we know the two buffers are the same, we know the result is 0.
1672   // First, get the two buffers' addresses. Another checker will have already
1673   // made sure they're not undefined.
1674   DefinedOrUnknownSVal LV = cast<DefinedOrUnknownSVal>(s1Val);
1675   DefinedOrUnknownSVal RV = cast<DefinedOrUnknownSVal>(s2Val);
1676 
1677   // See if they are the same.
1678   SValBuilder &svalBuilder = C.getSValBuilder();
1679   DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV);
1680   ProgramStateRef StSameBuf, StNotSameBuf;
1681   llvm::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf);
1682 
1683   // If the two arguments might be the same buffer, we know the result is 0,
1684   // and we only need to check one size.
1685   if (StSameBuf) {
1686     StSameBuf = StSameBuf->BindExpr(CE, LCtx,
1687                                     svalBuilder.makeZeroVal(CE->getType()));
1688     C.addTransition(StSameBuf);
1689 
1690     // If the two arguments are GUARANTEED to be the same, we're done!
1691     if (!StNotSameBuf)
1692       return;
1693   }
1694 
1695   assert(StNotSameBuf);
1696   state = StNotSameBuf;
1697 
1698   // At this point we can go about comparing the two buffers.
1699   // For now, we only do this if they're both known string literals.
1700 
1701   // Attempt to extract string literals from both expressions.
1702   const StringLiteral *s1StrLiteral = getCStringLiteral(C, state, s1, s1Val);
1703   const StringLiteral *s2StrLiteral = getCStringLiteral(C, state, s2, s2Val);
1704   bool canComputeResult = false;
1705 
1706   if (s1StrLiteral && s2StrLiteral) {
1707     StringRef s1StrRef = s1StrLiteral->getString();
1708     StringRef s2StrRef = s2StrLiteral->getString();
1709 
1710     if (isBounded) {
1711       // Get the max number of characters to compare.
1712       const Expr *lenExpr = CE->getArg(2);
1713       SVal lenVal = state->getSVal(lenExpr, LCtx);
1714 
1715       // If the length is known, we can get the right substrings.
1716       if (const llvm::APSInt *len = svalBuilder.getKnownValue(state, lenVal)) {
1717         // Create substrings of each to compare the prefix.
1718         s1StrRef = s1StrRef.substr(0, (size_t)len->getZExtValue());
1719         s2StrRef = s2StrRef.substr(0, (size_t)len->getZExtValue());
1720         canComputeResult = true;
1721       }
1722     } else {
1723       // This is a normal, unbounded strcmp.
1724       canComputeResult = true;
1725     }
1726 
1727     if (canComputeResult) {
1728       // Real strcmp stops at null characters.
1729       size_t s1Term = s1StrRef.find('\0');
1730       if (s1Term != StringRef::npos)
1731         s1StrRef = s1StrRef.substr(0, s1Term);
1732 
1733       size_t s2Term = s2StrRef.find('\0');
1734       if (s2Term != StringRef::npos)
1735         s2StrRef = s2StrRef.substr(0, s2Term);
1736 
1737       // Use StringRef's comparison methods to compute the actual result.
1738       int result;
1739 
1740       if (ignoreCase) {
1741         // Compare string 1 to string 2 the same way strcasecmp() does.
1742         result = s1StrRef.compare_lower(s2StrRef);
1743       } else {
1744         // Compare string 1 to string 2 the same way strcmp() does.
1745         result = s1StrRef.compare(s2StrRef);
1746       }
1747 
1748       // Build the SVal of the comparison and bind the return value.
1749       SVal resultVal = svalBuilder.makeIntVal(result, CE->getType());
1750       state = state->BindExpr(CE, LCtx, resultVal);
1751     }
1752   }
1753 
1754   if (!canComputeResult) {
1755     // Conjure a symbolic value. It's the best we can do.
1756     SVal resultVal = svalBuilder.conjureSymbolVal(0, CE, LCtx, C.blockCount());
1757     state = state->BindExpr(CE, LCtx, resultVal);
1758   }
1759 
1760   // Record this as a possible path.
1761   C.addTransition(state);
1762 }
1763 
1764 //===----------------------------------------------------------------------===//
1765 // The driver method, and other Checker callbacks.
1766 //===----------------------------------------------------------------------===//
1767 
1768 bool CStringChecker::evalCall(const CallExpr *CE, CheckerContext &C) const {
1769   const FunctionDecl *FDecl = C.getCalleeDecl(CE);
1770 
1771   if (!FDecl)
1772     return false;
1773 
1774   FnCheck evalFunction = 0;
1775   if (C.isCLibraryFunction(FDecl, "memcpy"))
1776     evalFunction =  &CStringChecker::evalMemcpy;
1777   else if (C.isCLibraryFunction(FDecl, "mempcpy"))
1778     evalFunction =  &CStringChecker::evalMempcpy;
1779   else if (C.isCLibraryFunction(FDecl, "memcmp"))
1780     evalFunction =  &CStringChecker::evalMemcmp;
1781   else if (C.isCLibraryFunction(FDecl, "memmove"))
1782     evalFunction =  &CStringChecker::evalMemmove;
1783   else if (C.isCLibraryFunction(FDecl, "strcpy"))
1784     evalFunction =  &CStringChecker::evalStrcpy;
1785   else if (C.isCLibraryFunction(FDecl, "strncpy"))
1786     evalFunction =  &CStringChecker::evalStrncpy;
1787   else if (C.isCLibraryFunction(FDecl, "stpcpy"))
1788     evalFunction =  &CStringChecker::evalStpcpy;
1789   else if (C.isCLibraryFunction(FDecl, "strcat"))
1790     evalFunction =  &CStringChecker::evalStrcat;
1791   else if (C.isCLibraryFunction(FDecl, "strncat"))
1792     evalFunction =  &CStringChecker::evalStrncat;
1793   else if (C.isCLibraryFunction(FDecl, "strlen"))
1794     evalFunction =  &CStringChecker::evalstrLength;
1795   else if (C.isCLibraryFunction(FDecl, "strnlen"))
1796     evalFunction =  &CStringChecker::evalstrnLength;
1797   else if (C.isCLibraryFunction(FDecl, "strcmp"))
1798     evalFunction =  &CStringChecker::evalStrcmp;
1799   else if (C.isCLibraryFunction(FDecl, "strncmp"))
1800     evalFunction =  &CStringChecker::evalStrncmp;
1801   else if (C.isCLibraryFunction(FDecl, "strcasecmp"))
1802     evalFunction =  &CStringChecker::evalStrcasecmp;
1803   else if (C.isCLibraryFunction(FDecl, "strncasecmp"))
1804     evalFunction =  &CStringChecker::evalStrncasecmp;
1805   else if (C.isCLibraryFunction(FDecl, "bcopy"))
1806     evalFunction =  &CStringChecker::evalBcopy;
1807   else if (C.isCLibraryFunction(FDecl, "bcmp"))
1808     evalFunction =  &CStringChecker::evalMemcmp;
1809 
1810   // If the callee isn't a string function, let another checker handle it.
1811   if (!evalFunction)
1812     return false;
1813 
1814   // Make sure each function sets its own description.
1815   // (But don't bother in a release build.)
1816   assert(!(CurrentFunctionDescription = NULL));
1817 
1818   // Check and evaluate the call.
1819   (this->*evalFunction)(C, CE);
1820 
1821   // If the evaluate call resulted in no change, chain to the next eval call
1822   // handler.
1823   // Note, the custom CString evaluation calls assume that basic safety
1824   // properties are held. However, if the user chooses to turn off some of these
1825   // checks, we ignore the issues and leave the call evaluation to a generic
1826   // handler.
1827   if (!C.isDifferent())
1828     return false;
1829 
1830   return true;
1831 }
1832 
1833 void CStringChecker::checkPreStmt(const DeclStmt *DS, CheckerContext &C) const {
1834   // Record string length for char a[] = "abc";
1835   ProgramStateRef state = C.getState();
1836 
1837   for (DeclStmt::const_decl_iterator I = DS->decl_begin(), E = DS->decl_end();
1838        I != E; ++I) {
1839     const VarDecl *D = dyn_cast<VarDecl>(*I);
1840     if (!D)
1841       continue;
1842 
1843     // FIXME: Handle array fields of structs.
1844     if (!D->getType()->isArrayType())
1845       continue;
1846 
1847     const Expr *Init = D->getInit();
1848     if (!Init)
1849       continue;
1850     if (!isa<StringLiteral>(Init))
1851       continue;
1852 
1853     Loc VarLoc = state->getLValue(D, C.getLocationContext());
1854     const MemRegion *MR = VarLoc.getAsRegion();
1855     if (!MR)
1856       continue;
1857 
1858     SVal StrVal = state->getSVal(Init, C.getLocationContext());
1859     assert(StrVal.isValid() && "Initializer string is unknown or undefined");
1860     DefinedOrUnknownSVal strLength
1861       = cast<DefinedOrUnknownSVal>(getCStringLength(C, state, Init, StrVal));
1862 
1863     state = state->set<CStringLength>(MR, strLength);
1864   }
1865 
1866   C.addTransition(state);
1867 }
1868 
1869 bool CStringChecker::wantsRegionChangeUpdate(ProgramStateRef state) const {
1870   CStringLengthTy Entries = state->get<CStringLength>();
1871   return !Entries.isEmpty();
1872 }
1873 
1874 ProgramStateRef
1875 CStringChecker::checkRegionChanges(ProgramStateRef state,
1876                                    const StoreManager::InvalidatedSymbols *,
1877                                    ArrayRef<const MemRegion *> ExplicitRegions,
1878                                    ArrayRef<const MemRegion *> Regions,
1879                                    const CallEvent *Call) const {
1880   CStringLengthTy Entries = state->get<CStringLength>();
1881   if (Entries.isEmpty())
1882     return state;
1883 
1884   llvm::SmallPtrSet<const MemRegion *, 8> Invalidated;
1885   llvm::SmallPtrSet<const MemRegion *, 32> SuperRegions;
1886 
1887   // First build sets for the changed regions and their super-regions.
1888   for (ArrayRef<const MemRegion *>::iterator
1889        I = Regions.begin(), E = Regions.end(); I != E; ++I) {
1890     const MemRegion *MR = *I;
1891     Invalidated.insert(MR);
1892 
1893     SuperRegions.insert(MR);
1894     while (const SubRegion *SR = dyn_cast<SubRegion>(MR)) {
1895       MR = SR->getSuperRegion();
1896       SuperRegions.insert(MR);
1897     }
1898   }
1899 
1900   CStringLengthTy::Factory &F = state->get_context<CStringLength>();
1901 
1902   // Then loop over the entries in the current state.
1903   for (CStringLengthTy::iterator I = Entries.begin(),
1904        E = Entries.end(); I != E; ++I) {
1905     const MemRegion *MR = I.getKey();
1906 
1907     // Is this entry for a super-region of a changed region?
1908     if (SuperRegions.count(MR)) {
1909       Entries = F.remove(Entries, MR);
1910       continue;
1911     }
1912 
1913     // Is this entry for a sub-region of a changed region?
1914     const MemRegion *Super = MR;
1915     while (const SubRegion *SR = dyn_cast<SubRegion>(Super)) {
1916       Super = SR->getSuperRegion();
1917       if (Invalidated.count(Super)) {
1918         Entries = F.remove(Entries, MR);
1919         break;
1920       }
1921     }
1922   }
1923 
1924   return state->set<CStringLength>(Entries);
1925 }
1926 
1927 void CStringChecker::checkLiveSymbols(ProgramStateRef state,
1928                                       SymbolReaper &SR) const {
1929   // Mark all symbols in our string length map as valid.
1930   CStringLengthTy Entries = state->get<CStringLength>();
1931 
1932   for (CStringLengthTy::iterator I = Entries.begin(), E = Entries.end();
1933        I != E; ++I) {
1934     SVal Len = I.getData();
1935 
1936     for (SymExpr::symbol_iterator si = Len.symbol_begin(),
1937                                   se = Len.symbol_end(); si != se; ++si)
1938       SR.markInUse(*si);
1939   }
1940 }
1941 
1942 void CStringChecker::checkDeadSymbols(SymbolReaper &SR,
1943                                       CheckerContext &C) const {
1944   if (!SR.hasDeadSymbols())
1945     return;
1946 
1947   ProgramStateRef state = C.getState();
1948   CStringLengthTy Entries = state->get<CStringLength>();
1949   if (Entries.isEmpty())
1950     return;
1951 
1952   CStringLengthTy::Factory &F = state->get_context<CStringLength>();
1953   for (CStringLengthTy::iterator I = Entries.begin(), E = Entries.end();
1954        I != E; ++I) {
1955     SVal Len = I.getData();
1956     if (SymbolRef Sym = Len.getAsSymbol()) {
1957       if (SR.isDead(Sym))
1958         Entries = F.remove(Entries, I.getKey());
1959     }
1960   }
1961 
1962   state = state->set<CStringLength>(Entries);
1963   C.addTransition(state);
1964 }
1965 
1966 #define REGISTER_CHECKER(name) \
1967 void ento::register##name(CheckerManager &mgr) {\
1968   static CStringChecker *TheChecker = 0; \
1969   if (TheChecker == 0) \
1970     TheChecker = mgr.registerChecker<CStringChecker>(); \
1971   TheChecker->Filter.Check##name = true; \
1972 }
1973 
1974 REGISTER_CHECKER(CStringNullArg)
1975 REGISTER_CHECKER(CStringOutOfBounds)
1976 REGISTER_CHECKER(CStringBufferOverlap)
1977 REGISTER_CHECKER(CStringNotNullTerm)
1978 
1979 void ento::registerCStringCheckerBasic(CheckerManager &Mgr) {
1980   registerCStringNullArg(Mgr);
1981 }
1982