1 //== RangeConstraintManager.cpp - Manage range constraints.------*- C++ -*--==//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 //  This file defines RangeConstraintManager, a class that tracks simple
11 //  equality and inequality constraints on symbolic values of ProgramState.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "SimpleConstraintManager.h"
16 #include "clang/StaticAnalyzer/Core/PathSensitive/APSIntType.h"
17 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
18 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
19 #include "llvm/Support/Debug.h"
20 #include "llvm/ADT/FoldingSet.h"
21 #include "llvm/ADT/ImmutableSet.h"
22 #include "llvm/Support/raw_ostream.h"
23 
24 using namespace clang;
25 using namespace ento;
26 
27 namespace { class ConstraintRange {}; }
28 static int ConstraintRangeIndex = 0;
29 
30 /// A Range represents the closed range [from, to].  The caller must
31 /// guarantee that from <= to.  Note that Range is immutable, so as not
32 /// to subvert RangeSet's immutability.
33 namespace {
34 class Range : public std::pair<const llvm::APSInt*,
35                                                 const llvm::APSInt*> {
36 public:
37   Range(const llvm::APSInt &from, const llvm::APSInt &to)
38     : std::pair<const llvm::APSInt*, const llvm::APSInt*>(&from, &to) {
39     assert(from <= to);
40   }
41   bool Includes(const llvm::APSInt &v) const {
42     return *first <= v && v <= *second;
43   }
44   const llvm::APSInt &From() const {
45     return *first;
46   }
47   const llvm::APSInt &To() const {
48     return *second;
49   }
50   const llvm::APSInt *getConcreteValue() const {
51     return &From() == &To() ? &From() : NULL;
52   }
53 
54   void Profile(llvm::FoldingSetNodeID &ID) const {
55     ID.AddPointer(&From());
56     ID.AddPointer(&To());
57   }
58 };
59 
60 
61 class RangeTrait : public llvm::ImutContainerInfo<Range> {
62 public:
63   // When comparing if one Range is less than another, we should compare
64   // the actual APSInt values instead of their pointers.  This keeps the order
65   // consistent (instead of comparing by pointer values) and can potentially
66   // be used to speed up some of the operations in RangeSet.
67   static inline bool isLess(key_type_ref lhs, key_type_ref rhs) {
68     return *lhs.first < *rhs.first || (!(*rhs.first < *lhs.first) &&
69                                        *lhs.second < *rhs.second);
70   }
71 };
72 
73 /// RangeSet contains a set of ranges. If the set is empty, then
74 ///  there the value of a symbol is overly constrained and there are no
75 ///  possible values for that symbol.
76 class RangeSet {
77   typedef llvm::ImmutableSet<Range, RangeTrait> PrimRangeSet;
78   PrimRangeSet ranges; // no need to make const, since it is an
79                        // ImmutableSet - this allows default operator=
80                        // to work.
81 public:
82   typedef PrimRangeSet::Factory Factory;
83   typedef PrimRangeSet::iterator iterator;
84 
85   RangeSet(PrimRangeSet RS) : ranges(RS) {}
86 
87   iterator begin() const { return ranges.begin(); }
88   iterator end() const { return ranges.end(); }
89 
90   bool isEmpty() const { return ranges.isEmpty(); }
91 
92   /// Construct a new RangeSet representing '{ [from, to] }'.
93   RangeSet(Factory &F, const llvm::APSInt &from, const llvm::APSInt &to)
94     : ranges(F.add(F.getEmptySet(), Range(from, to))) {}
95 
96   /// Profile - Generates a hash profile of this RangeSet for use
97   ///  by FoldingSet.
98   void Profile(llvm::FoldingSetNodeID &ID) const { ranges.Profile(ID); }
99 
100   /// getConcreteValue - If a symbol is contrained to equal a specific integer
101   ///  constant then this method returns that value.  Otherwise, it returns
102   ///  NULL.
103   const llvm::APSInt* getConcreteValue() const {
104     return ranges.isSingleton() ? ranges.begin()->getConcreteValue() : 0;
105   }
106 
107 private:
108   void IntersectInRange(BasicValueFactory &BV, Factory &F,
109                         const llvm::APSInt &Lower,
110                         const llvm::APSInt &Upper,
111                         PrimRangeSet &newRanges,
112                         PrimRangeSet::iterator &i,
113                         PrimRangeSet::iterator &e) const {
114     // There are six cases for each range R in the set:
115     //   1. R is entirely before the intersection range.
116     //   2. R is entirely after the intersection range.
117     //   3. R contains the entire intersection range.
118     //   4. R starts before the intersection range and ends in the middle.
119     //   5. R starts in the middle of the intersection range and ends after it.
120     //   6. R is entirely contained in the intersection range.
121     // These correspond to each of the conditions below.
122     for (/* i = begin(), e = end() */; i != e; ++i) {
123       if (i->To() < Lower) {
124         continue;
125       }
126       if (i->From() > Upper) {
127         break;
128       }
129 
130       if (i->Includes(Lower)) {
131         if (i->Includes(Upper)) {
132           newRanges = F.add(newRanges, Range(BV.getValue(Lower),
133                                              BV.getValue(Upper)));
134           break;
135         } else
136           newRanges = F.add(newRanges, Range(BV.getValue(Lower), i->To()));
137       } else {
138         if (i->Includes(Upper)) {
139           newRanges = F.add(newRanges, Range(i->From(), BV.getValue(Upper)));
140           break;
141         } else
142           newRanges = F.add(newRanges, *i);
143       }
144     }
145   }
146 
147   const llvm::APSInt &getMinValue() const {
148     assert(!isEmpty());
149     return ranges.begin()->From();
150   }
151 
152   bool pin(llvm::APSInt &Lower, llvm::APSInt &Upper) const {
153     // This function has nine cases, the cartesian product of range-testing
154     // both the upper and lower bounds against the symbol's type.
155     // Each case requires a different pinning operation.
156     // The function returns false if the described range is entirely outside
157     // the range of values for the associated symbol.
158     APSIntType Type(getMinValue());
159     APSIntType::RangeTestResultKind LowerTest = Type.testInRange(Lower);
160     APSIntType::RangeTestResultKind UpperTest = Type.testInRange(Upper);
161 
162     switch (LowerTest) {
163     case APSIntType::RTR_Below:
164       switch (UpperTest) {
165       case APSIntType::RTR_Below:
166         // The entire range is outside the symbol's set of possible values.
167         // If this is a conventionally-ordered range, the state is infeasible.
168         if (Lower < Upper)
169           return false;
170 
171         // However, if the range wraps around, it spans all possible values.
172         Lower = Type.getMinValue();
173         Upper = Type.getMaxValue();
174         break;
175       case APSIntType::RTR_Within:
176         // The range starts below what's possible but ends within it. Pin.
177         Lower = Type.getMinValue();
178         Type.apply(Upper);
179         break;
180       case APSIntType::RTR_Above:
181         // The range spans all possible values for the symbol. Pin.
182         Lower = Type.getMinValue();
183         Upper = Type.getMaxValue();
184         break;
185       }
186       break;
187     case APSIntType::RTR_Within:
188       switch (UpperTest) {
189       case APSIntType::RTR_Below:
190         // The range wraps around, but all lower values are not possible.
191         Type.apply(Lower);
192         Upper = Type.getMaxValue();
193         break;
194       case APSIntType::RTR_Within:
195         // The range may or may not wrap around, but both limits are valid.
196         Type.apply(Lower);
197         Type.apply(Upper);
198         break;
199       case APSIntType::RTR_Above:
200         // The range starts within what's possible but ends above it. Pin.
201         Type.apply(Lower);
202         Upper = Type.getMaxValue();
203         break;
204       }
205       break;
206     case APSIntType::RTR_Above:
207       switch (UpperTest) {
208       case APSIntType::RTR_Below:
209         // The range wraps but is outside the symbol's set of possible values.
210         return false;
211       case APSIntType::RTR_Within:
212         // The range starts above what's possible but ends within it (wrap).
213         Lower = Type.getMinValue();
214         Type.apply(Upper);
215         break;
216       case APSIntType::RTR_Above:
217         // The entire range is outside the symbol's set of possible values.
218         // If this is a conventionally-ordered range, the state is infeasible.
219         if (Lower < Upper)
220           return false;
221 
222         // However, if the range wraps around, it spans all possible values.
223         Lower = Type.getMinValue();
224         Upper = Type.getMaxValue();
225         break;
226       }
227       break;
228     }
229 
230     return true;
231   }
232 
233 public:
234   // Returns a set containing the values in the receiving set, intersected with
235   // the closed range [Lower, Upper]. Unlike the Range type, this range uses
236   // modular arithmetic, corresponding to the common treatment of C integer
237   // overflow. Thus, if the Lower bound is greater than the Upper bound, the
238   // range is taken to wrap around. This is equivalent to taking the
239   // intersection with the two ranges [Min, Upper] and [Lower, Max],
240   // or, alternatively, /removing/ all integers between Upper and Lower.
241   RangeSet Intersect(BasicValueFactory &BV, Factory &F,
242                      llvm::APSInt Lower, llvm::APSInt Upper) const {
243     if (!pin(Lower, Upper))
244       return F.getEmptySet();
245 
246     PrimRangeSet newRanges = F.getEmptySet();
247 
248     PrimRangeSet::iterator i = begin(), e = end();
249     if (Lower <= Upper)
250       IntersectInRange(BV, F, Lower, Upper, newRanges, i, e);
251     else {
252       // The order of the next two statements is important!
253       // IntersectInRange() does not reset the iteration state for i and e.
254       // Therefore, the lower range most be handled first.
255       IntersectInRange(BV, F, BV.getMinValue(Upper), Upper, newRanges, i, e);
256       IntersectInRange(BV, F, Lower, BV.getMaxValue(Lower), newRanges, i, e);
257     }
258 
259     return newRanges;
260   }
261 
262   void print(raw_ostream &os) const {
263     bool isFirst = true;
264     os << "{ ";
265     for (iterator i = begin(), e = end(); i != e; ++i) {
266       if (isFirst)
267         isFirst = false;
268       else
269         os << ", ";
270 
271       os << '[' << i->From().toString(10) << ", " << i->To().toString(10)
272          << ']';
273     }
274     os << " }";
275   }
276 
277   bool operator==(const RangeSet &other) const {
278     return ranges == other.ranges;
279   }
280 };
281 } // end anonymous namespace
282 
283 typedef llvm::ImmutableMap<SymbolRef,RangeSet> ConstraintRangeTy;
284 
285 namespace clang {
286 namespace ento {
287 template<>
288 struct ProgramStateTrait<ConstraintRange>
289   : public ProgramStatePartialTrait<ConstraintRangeTy> {
290   static inline void *GDMIndex() { return &ConstraintRangeIndex; }
291 };
292 }
293 }
294 
295 namespace {
296 class RangeConstraintManager : public SimpleConstraintManager{
297   RangeSet GetRange(ProgramStateRef state, SymbolRef sym);
298 public:
299   RangeConstraintManager(SubEngine &subengine, BasicValueFactory &BVF)
300     : SimpleConstraintManager(subengine, BVF) {}
301 
302   ProgramStateRef assumeSymNE(ProgramStateRef state, SymbolRef sym,
303                              const llvm::APSInt& Int,
304                              const llvm::APSInt& Adjustment);
305 
306   ProgramStateRef assumeSymEQ(ProgramStateRef state, SymbolRef sym,
307                              const llvm::APSInt& Int,
308                              const llvm::APSInt& Adjustment);
309 
310   ProgramStateRef assumeSymLT(ProgramStateRef state, SymbolRef sym,
311                              const llvm::APSInt& Int,
312                              const llvm::APSInt& Adjustment);
313 
314   ProgramStateRef assumeSymGT(ProgramStateRef state, SymbolRef sym,
315                              const llvm::APSInt& Int,
316                              const llvm::APSInt& Adjustment);
317 
318   ProgramStateRef assumeSymGE(ProgramStateRef state, SymbolRef sym,
319                              const llvm::APSInt& Int,
320                              const llvm::APSInt& Adjustment);
321 
322   ProgramStateRef assumeSymLE(ProgramStateRef state, SymbolRef sym,
323                              const llvm::APSInt& Int,
324                              const llvm::APSInt& Adjustment);
325 
326   const llvm::APSInt* getSymVal(ProgramStateRef St, SymbolRef sym) const;
327 
328   // FIXME: Refactor into SimpleConstraintManager?
329   bool isEqual(ProgramStateRef St, SymbolRef sym, const llvm::APSInt& V) const {
330     const llvm::APSInt *i = getSymVal(St, sym);
331     return i ? *i == V : false;
332   }
333 
334   ProgramStateRef removeDeadBindings(ProgramStateRef St, SymbolReaper& SymReaper);
335 
336   void print(ProgramStateRef St, raw_ostream &Out,
337              const char* nl, const char *sep);
338 
339 private:
340   RangeSet::Factory F;
341 };
342 
343 } // end anonymous namespace
344 
345 ConstraintManager *
346 ento::CreateRangeConstraintManager(ProgramStateManager &StMgr, SubEngine &Eng) {
347   return new RangeConstraintManager(Eng, StMgr.getBasicVals());
348 }
349 
350 const llvm::APSInt* RangeConstraintManager::getSymVal(ProgramStateRef St,
351                                                       SymbolRef sym) const {
352   const ConstraintRangeTy::data_type *T = St->get<ConstraintRange>(sym);
353   return T ? T->getConcreteValue() : NULL;
354 }
355 
356 /// Scan all symbols referenced by the constraints. If the symbol is not alive
357 /// as marked in LSymbols, mark it as dead in DSymbols.
358 ProgramStateRef
359 RangeConstraintManager::removeDeadBindings(ProgramStateRef state,
360                                            SymbolReaper& SymReaper) {
361 
362   ConstraintRangeTy CR = state->get<ConstraintRange>();
363   ConstraintRangeTy::Factory& CRFactory = state->get_context<ConstraintRange>();
364 
365   for (ConstraintRangeTy::iterator I = CR.begin(), E = CR.end(); I != E; ++I) {
366     SymbolRef sym = I.getKey();
367     if (SymReaper.maybeDead(sym))
368       CR = CRFactory.remove(CR, sym);
369   }
370 
371   return state->set<ConstraintRange>(CR);
372 }
373 
374 RangeSet
375 RangeConstraintManager::GetRange(ProgramStateRef state, SymbolRef sym) {
376   if (ConstraintRangeTy::data_type* V = state->get<ConstraintRange>(sym))
377     return *V;
378 
379   // Lazily generate a new RangeSet representing all possible values for the
380   // given symbol type.
381   BasicValueFactory &BV = getBasicVals();
382   QualType T = sym->getType(BV.getContext());
383 
384   RangeSet Result(F, BV.getMinValue(T), BV.getMaxValue(T));
385 
386   // Special case: references are known to be non-zero.
387   if (T->isReferenceType()) {
388     APSIntType IntType = BV.getAPSIntType(T);
389     Result = Result.Intersect(BV, F, ++IntType.getZeroValue(),
390                                      --IntType.getZeroValue());
391   }
392 
393   return Result;
394 }
395 
396 //===------------------------------------------------------------------------===
397 // assumeSymX methods: public interface for RangeConstraintManager.
398 //===------------------------------------------------------------------------===/
399 
400 // The syntax for ranges below is mathematical, using [x, y] for closed ranges
401 // and (x, y) for open ranges. These ranges are modular, corresponding with
402 // a common treatment of C integer overflow. This means that these methods
403 // do not have to worry about overflow; RangeSet::Intersect can handle such a
404 // "wraparound" range.
405 // As an example, the range [UINT_MAX-1, 3) contains five values: UINT_MAX-1,
406 // UINT_MAX, 0, 1, and 2.
407 
408 ProgramStateRef
409 RangeConstraintManager::assumeSymNE(ProgramStateRef St, SymbolRef Sym,
410                                     const llvm::APSInt &Int,
411                                     const llvm::APSInt &Adjustment) {
412   // Before we do any real work, see if the value can even show up.
413   APSIntType AdjustmentType(Adjustment);
414   if (AdjustmentType.testInRange(Int) != APSIntType::RTR_Within)
415     return St;
416 
417   llvm::APSInt Lower = AdjustmentType.convert(Int) - Adjustment;
418   llvm::APSInt Upper = Lower;
419   --Lower;
420   ++Upper;
421 
422   // [Int-Adjustment+1, Int-Adjustment-1]
423   // Notice that the lower bound is greater than the upper bound.
424   RangeSet New = GetRange(St, Sym).Intersect(getBasicVals(), F, Upper, Lower);
425   return New.isEmpty() ? NULL : St->set<ConstraintRange>(Sym, New);
426 }
427 
428 ProgramStateRef
429 RangeConstraintManager::assumeSymEQ(ProgramStateRef St, SymbolRef Sym,
430                                     const llvm::APSInt &Int,
431                                     const llvm::APSInt &Adjustment) {
432   // Before we do any real work, see if the value can even show up.
433   APSIntType AdjustmentType(Adjustment);
434   if (AdjustmentType.testInRange(Int) != APSIntType::RTR_Within)
435     return NULL;
436 
437   // [Int-Adjustment, Int-Adjustment]
438   llvm::APSInt AdjInt = AdjustmentType.convert(Int) - Adjustment;
439   RangeSet New = GetRange(St, Sym).Intersect(getBasicVals(), F, AdjInt, AdjInt);
440   return New.isEmpty() ? NULL : St->set<ConstraintRange>(Sym, New);
441 }
442 
443 ProgramStateRef
444 RangeConstraintManager::assumeSymLT(ProgramStateRef St, SymbolRef Sym,
445                                     const llvm::APSInt &Int,
446                                     const llvm::APSInt &Adjustment) {
447   // Before we do any real work, see if the value can even show up.
448   APSIntType AdjustmentType(Adjustment);
449   switch (AdjustmentType.testInRange(Int)) {
450   case APSIntType::RTR_Below:
451     return NULL;
452   case APSIntType::RTR_Within:
453     break;
454   case APSIntType::RTR_Above:
455     return St;
456   }
457 
458   // Special case for Int == Min. This is always false.
459   llvm::APSInt ComparisonVal = AdjustmentType.convert(Int);
460   llvm::APSInt Min = AdjustmentType.getMinValue();
461   if (ComparisonVal == Min)
462     return NULL;
463 
464   llvm::APSInt Lower = Min-Adjustment;
465   llvm::APSInt Upper = ComparisonVal-Adjustment;
466   --Upper;
467 
468   RangeSet New = GetRange(St, Sym).Intersect(getBasicVals(), F, Lower, Upper);
469   return New.isEmpty() ? NULL : St->set<ConstraintRange>(Sym, New);
470 }
471 
472 ProgramStateRef
473 RangeConstraintManager::assumeSymGT(ProgramStateRef St, SymbolRef Sym,
474                                     const llvm::APSInt &Int,
475                                     const llvm::APSInt &Adjustment) {
476   // Before we do any real work, see if the value can even show up.
477   APSIntType AdjustmentType(Adjustment);
478   switch (AdjustmentType.testInRange(Int)) {
479   case APSIntType::RTR_Below:
480     return St;
481   case APSIntType::RTR_Within:
482     break;
483   case APSIntType::RTR_Above:
484     return NULL;
485   }
486 
487   // Special case for Int == Max. This is always false.
488   llvm::APSInt ComparisonVal = AdjustmentType.convert(Int);
489   llvm::APSInt Max = AdjustmentType.getMaxValue();
490   if (ComparisonVal == Max)
491     return NULL;
492 
493   llvm::APSInt Lower = ComparisonVal-Adjustment;
494   llvm::APSInt Upper = Max-Adjustment;
495   ++Lower;
496 
497   RangeSet New = GetRange(St, Sym).Intersect(getBasicVals(), F, Lower, Upper);
498   return New.isEmpty() ? NULL : St->set<ConstraintRange>(Sym, New);
499 }
500 
501 ProgramStateRef
502 RangeConstraintManager::assumeSymGE(ProgramStateRef St, SymbolRef Sym,
503                                     const llvm::APSInt &Int,
504                                     const llvm::APSInt &Adjustment) {
505   // Before we do any real work, see if the value can even show up.
506   APSIntType AdjustmentType(Adjustment);
507   switch (AdjustmentType.testInRange(Int)) {
508   case APSIntType::RTR_Below:
509     return St;
510   case APSIntType::RTR_Within:
511     break;
512   case APSIntType::RTR_Above:
513     return NULL;
514   }
515 
516   // Special case for Int == Min. This is always feasible.
517   llvm::APSInt ComparisonVal = AdjustmentType.convert(Int);
518   llvm::APSInt Min = AdjustmentType.getMinValue();
519   if (ComparisonVal == Min)
520     return St;
521 
522   llvm::APSInt Max = AdjustmentType.getMaxValue();
523   llvm::APSInt Lower = ComparisonVal-Adjustment;
524   llvm::APSInt Upper = Max-Adjustment;
525 
526   RangeSet New = GetRange(St, Sym).Intersect(getBasicVals(), F, Lower, Upper);
527   return New.isEmpty() ? NULL : St->set<ConstraintRange>(Sym, New);
528 }
529 
530 ProgramStateRef
531 RangeConstraintManager::assumeSymLE(ProgramStateRef St, SymbolRef Sym,
532                                     const llvm::APSInt &Int,
533                                     const llvm::APSInt &Adjustment) {
534   // Before we do any real work, see if the value can even show up.
535   APSIntType AdjustmentType(Adjustment);
536   switch (AdjustmentType.testInRange(Int)) {
537   case APSIntType::RTR_Below:
538     return NULL;
539   case APSIntType::RTR_Within:
540     break;
541   case APSIntType::RTR_Above:
542     return St;
543   }
544 
545   // Special case for Int == Max. This is always feasible.
546   llvm::APSInt ComparisonVal = AdjustmentType.convert(Int);
547   llvm::APSInt Max = AdjustmentType.getMaxValue();
548   if (ComparisonVal == Max)
549     return St;
550 
551   llvm::APSInt Min = AdjustmentType.getMinValue();
552   llvm::APSInt Lower = Min-Adjustment;
553   llvm::APSInt Upper = ComparisonVal-Adjustment;
554 
555   RangeSet New = GetRange(St, Sym).Intersect(getBasicVals(), F, Lower, Upper);
556   return New.isEmpty() ? NULL : St->set<ConstraintRange>(Sym, New);
557 }
558 
559 //===------------------------------------------------------------------------===
560 // Pretty-printing.
561 //===------------------------------------------------------------------------===/
562 
563 void RangeConstraintManager::print(ProgramStateRef St, raw_ostream &Out,
564                                    const char* nl, const char *sep) {
565 
566   ConstraintRangeTy Ranges = St->get<ConstraintRange>();
567 
568   if (Ranges.isEmpty()) {
569     Out << nl << sep << "Ranges are empty." << nl;
570     return;
571   }
572 
573   Out << nl << sep << "Ranges of symbol values:";
574   for (ConstraintRangeTy::iterator I=Ranges.begin(), E=Ranges.end(); I!=E; ++I){
575     Out << nl << ' ' << I.getKey() << " : ";
576     I.getData().print(Out);
577   }
578   Out << nl;
579 }
580