1 // SimpleSValBuilder.cpp - A basic SValBuilder -----------------------*- C++ -*-
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file defines SimpleSValBuilder, a basic implementation of SValBuilder.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h"
14 #include "clang/StaticAnalyzer/Core/PathSensitive/APSIntType.h"
15 #include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h"
16 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
17 #include "clang/StaticAnalyzer/Core/PathSensitive/SValVisitor.h"
18 
19 using namespace clang;
20 using namespace ento;
21 
22 namespace {
23 class SimpleSValBuilder : public SValBuilder {
24 
25   // With one `simplifySValOnce` call, a compound symbols might collapse to
26   // simpler symbol tree that is still possible to further simplify. Thus, we
27   // do the simplification on a new symbol tree until we reach the simplest
28   // form, i.e. the fixpoint.
29   // Consider the following symbol `(b * b) * b * b` which has this tree:
30   //       *
31   //      / \
32   //     *   b
33   //    /  \
34   //   /    b
35   // (b * b)
36   // Now, if the `b * b == 1` new constraint is added then during the first
37   // iteration we have the following transformations:
38   //       *                  *
39   //      / \                / \
40   //     *   b     -->      b   b
41   //    /  \
42   //   /    b
43   //  1
44   // We need another iteration to reach the final result `1`.
45   SVal simplifyUntilFixpoint(ProgramStateRef State, SVal Val);
46 
47   // Recursively descends into symbolic expressions and replaces symbols
48   // with their known values (in the sense of the getKnownValue() method).
49   // We traverse the symbol tree and query the constraint values for the
50   // sub-trees and if a value is a constant we do the constant folding.
51   SVal simplifySValOnce(ProgramStateRef State, SVal V);
52 
53 public:
54   SimpleSValBuilder(llvm::BumpPtrAllocator &alloc, ASTContext &context,
55                     ProgramStateManager &stateMgr)
56       : SValBuilder(alloc, context, stateMgr) {}
57   ~SimpleSValBuilder() override {}
58 
59   SVal evalMinus(NonLoc val) override;
60   SVal evalComplement(NonLoc val) override;
61   SVal evalBinOpNN(ProgramStateRef state, BinaryOperator::Opcode op,
62                    NonLoc lhs, NonLoc rhs, QualType resultTy) override;
63   SVal evalBinOpLL(ProgramStateRef state, BinaryOperator::Opcode op,
64                    Loc lhs, Loc rhs, QualType resultTy) override;
65   SVal evalBinOpLN(ProgramStateRef state, BinaryOperator::Opcode op,
66                    Loc lhs, NonLoc rhs, QualType resultTy) override;
67 
68   /// getKnownValue - evaluates a given SVal. If the SVal has only one possible
69   ///  (integer) value, that value is returned. Otherwise, returns NULL.
70   const llvm::APSInt *getKnownValue(ProgramStateRef state, SVal V) override;
71 
72   SVal simplifySVal(ProgramStateRef State, SVal V) override;
73 
74   SVal MakeSymIntVal(const SymExpr *LHS, BinaryOperator::Opcode op,
75                      const llvm::APSInt &RHS, QualType resultTy);
76 };
77 } // end anonymous namespace
78 
79 SValBuilder *ento::createSimpleSValBuilder(llvm::BumpPtrAllocator &alloc,
80                                            ASTContext &context,
81                                            ProgramStateManager &stateMgr) {
82   return new SimpleSValBuilder(alloc, context, stateMgr);
83 }
84 
85 //===----------------------------------------------------------------------===//
86 // Transfer function for unary operators.
87 //===----------------------------------------------------------------------===//
88 
89 SVal SimpleSValBuilder::evalMinus(NonLoc val) {
90   switch (val.getSubKind()) {
91   case nonloc::ConcreteIntKind:
92     return val.castAs<nonloc::ConcreteInt>().evalMinus(*this);
93   default:
94     return UnknownVal();
95   }
96 }
97 
98 SVal SimpleSValBuilder::evalComplement(NonLoc X) {
99   switch (X.getSubKind()) {
100   case nonloc::ConcreteIntKind:
101     return X.castAs<nonloc::ConcreteInt>().evalComplement(*this);
102   default:
103     return UnknownVal();
104   }
105 }
106 
107 //===----------------------------------------------------------------------===//
108 // Transfer function for binary operators.
109 //===----------------------------------------------------------------------===//
110 
111 SVal SimpleSValBuilder::MakeSymIntVal(const SymExpr *LHS,
112                                     BinaryOperator::Opcode op,
113                                     const llvm::APSInt &RHS,
114                                     QualType resultTy) {
115   bool isIdempotent = false;
116 
117   // Check for a few special cases with known reductions first.
118   switch (op) {
119   default:
120     // We can't reduce this case; just treat it normally.
121     break;
122   case BO_Mul:
123     // a*0 and a*1
124     if (RHS == 0)
125       return makeIntVal(0, resultTy);
126     else if (RHS == 1)
127       isIdempotent = true;
128     break;
129   case BO_Div:
130     // a/0 and a/1
131     if (RHS == 0)
132       // This is also handled elsewhere.
133       return UndefinedVal();
134     else if (RHS == 1)
135       isIdempotent = true;
136     break;
137   case BO_Rem:
138     // a%0 and a%1
139     if (RHS == 0)
140       // This is also handled elsewhere.
141       return UndefinedVal();
142     else if (RHS == 1)
143       return makeIntVal(0, resultTy);
144     break;
145   case BO_Add:
146   case BO_Sub:
147   case BO_Shl:
148   case BO_Shr:
149   case BO_Xor:
150     // a+0, a-0, a<<0, a>>0, a^0
151     if (RHS == 0)
152       isIdempotent = true;
153     break;
154   case BO_And:
155     // a&0 and a&(~0)
156     if (RHS == 0)
157       return makeIntVal(0, resultTy);
158     else if (RHS.isAllOnes())
159       isIdempotent = true;
160     break;
161   case BO_Or:
162     // a|0 and a|(~0)
163     if (RHS == 0)
164       isIdempotent = true;
165     else if (RHS.isAllOnes()) {
166       const llvm::APSInt &Result = BasicVals.Convert(resultTy, RHS);
167       return nonloc::ConcreteInt(Result);
168     }
169     break;
170   }
171 
172   // Idempotent ops (like a*1) can still change the type of an expression.
173   // Wrap the LHS up in a NonLoc again and let evalCast do the
174   // dirty work.
175   if (isIdempotent)
176     return evalCast(nonloc::SymbolVal(LHS), resultTy, QualType{});
177 
178   // If we reach this point, the expression cannot be simplified.
179   // Make a SymbolVal for the entire expression, after converting the RHS.
180   const llvm::APSInt *ConvertedRHS = &RHS;
181   if (BinaryOperator::isComparisonOp(op)) {
182     // We're looking for a type big enough to compare the symbolic value
183     // with the given constant.
184     // FIXME: This is an approximation of Sema::UsualArithmeticConversions.
185     ASTContext &Ctx = getContext();
186     QualType SymbolType = LHS->getType();
187     uint64_t ValWidth = RHS.getBitWidth();
188     uint64_t TypeWidth = Ctx.getTypeSize(SymbolType);
189 
190     if (ValWidth < TypeWidth) {
191       // If the value is too small, extend it.
192       ConvertedRHS = &BasicVals.Convert(SymbolType, RHS);
193     } else if (ValWidth == TypeWidth) {
194       // If the value is signed but the symbol is unsigned, do the comparison
195       // in unsigned space. [C99 6.3.1.8]
196       // (For the opposite case, the value is already unsigned.)
197       if (RHS.isSigned() && !SymbolType->isSignedIntegerOrEnumerationType())
198         ConvertedRHS = &BasicVals.Convert(SymbolType, RHS);
199     }
200   } else
201     ConvertedRHS = &BasicVals.Convert(resultTy, RHS);
202 
203   return makeNonLoc(LHS, op, *ConvertedRHS, resultTy);
204 }
205 
206 // See if Sym is known to be a relation Rel with Bound.
207 static bool isInRelation(BinaryOperator::Opcode Rel, SymbolRef Sym,
208                          llvm::APSInt Bound, ProgramStateRef State) {
209   SValBuilder &SVB = State->getStateManager().getSValBuilder();
210   SVal Result =
211       SVB.evalBinOpNN(State, Rel, nonloc::SymbolVal(Sym),
212                       nonloc::ConcreteInt(Bound), SVB.getConditionType());
213   if (auto DV = Result.getAs<DefinedSVal>()) {
214     return !State->assume(*DV, false);
215   }
216   return false;
217 }
218 
219 // See if Sym is known to be within [min/4, max/4], where min and max
220 // are the bounds of the symbol's integral type. With such symbols,
221 // some manipulations can be performed without the risk of overflow.
222 // assume() doesn't cause infinite recursion because we should be dealing
223 // with simpler symbols on every recursive call.
224 static bool isWithinConstantOverflowBounds(SymbolRef Sym,
225                                            ProgramStateRef State) {
226   SValBuilder &SVB = State->getStateManager().getSValBuilder();
227   BasicValueFactory &BV = SVB.getBasicValueFactory();
228 
229   QualType T = Sym->getType();
230   assert(T->isSignedIntegerOrEnumerationType() &&
231          "This only works with signed integers!");
232   APSIntType AT = BV.getAPSIntType(T);
233 
234   llvm::APSInt Max = AT.getMaxValue() / AT.getValue(4), Min = -Max;
235   return isInRelation(BO_LE, Sym, Max, State) &&
236          isInRelation(BO_GE, Sym, Min, State);
237 }
238 
239 // Same for the concrete integers: see if I is within [min/4, max/4].
240 static bool isWithinConstantOverflowBounds(llvm::APSInt I) {
241   APSIntType AT(I);
242   assert(!AT.isUnsigned() &&
243          "This only works with signed integers!");
244 
245   llvm::APSInt Max = AT.getMaxValue() / AT.getValue(4), Min = -Max;
246   return (I <= Max) && (I >= -Max);
247 }
248 
249 static std::pair<SymbolRef, llvm::APSInt>
250 decomposeSymbol(SymbolRef Sym, BasicValueFactory &BV) {
251   if (const auto *SymInt = dyn_cast<SymIntExpr>(Sym))
252     if (BinaryOperator::isAdditiveOp(SymInt->getOpcode()))
253       return std::make_pair(SymInt->getLHS(),
254                             (SymInt->getOpcode() == BO_Add) ?
255                             (SymInt->getRHS()) :
256                             (-SymInt->getRHS()));
257 
258   // Fail to decompose: "reduce" the problem to the "$x + 0" case.
259   return std::make_pair(Sym, BV.getValue(0, Sym->getType()));
260 }
261 
262 // Simplify "(LSym + LInt) Op (RSym + RInt)" assuming all values are of the
263 // same signed integral type and no overflows occur (which should be checked
264 // by the caller).
265 static NonLoc doRearrangeUnchecked(ProgramStateRef State,
266                                    BinaryOperator::Opcode Op,
267                                    SymbolRef LSym, llvm::APSInt LInt,
268                                    SymbolRef RSym, llvm::APSInt RInt) {
269   SValBuilder &SVB = State->getStateManager().getSValBuilder();
270   BasicValueFactory &BV = SVB.getBasicValueFactory();
271   SymbolManager &SymMgr = SVB.getSymbolManager();
272 
273   QualType SymTy = LSym->getType();
274   assert(SymTy == RSym->getType() &&
275          "Symbols are not of the same type!");
276   assert(APSIntType(LInt) == BV.getAPSIntType(SymTy) &&
277          "Integers are not of the same type as symbols!");
278   assert(APSIntType(RInt) == BV.getAPSIntType(SymTy) &&
279          "Integers are not of the same type as symbols!");
280 
281   QualType ResultTy;
282   if (BinaryOperator::isComparisonOp(Op))
283     ResultTy = SVB.getConditionType();
284   else if (BinaryOperator::isAdditiveOp(Op))
285     ResultTy = SymTy;
286   else
287     llvm_unreachable("Operation not suitable for unchecked rearrangement!");
288 
289   // FIXME: Can we use assume() without getting into an infinite recursion?
290   if (LSym == RSym)
291     return SVB.evalBinOpNN(State, Op, nonloc::ConcreteInt(LInt),
292                            nonloc::ConcreteInt(RInt), ResultTy)
293         .castAs<NonLoc>();
294 
295   SymbolRef ResultSym = nullptr;
296   BinaryOperator::Opcode ResultOp;
297   llvm::APSInt ResultInt;
298   if (BinaryOperator::isComparisonOp(Op)) {
299     // Prefer comparing to a non-negative number.
300     // FIXME: Maybe it'd be better to have consistency in
301     // "$x - $y" vs. "$y - $x" because those are solver's keys.
302     if (LInt > RInt) {
303       ResultSym = SymMgr.getSymSymExpr(RSym, BO_Sub, LSym, SymTy);
304       ResultOp = BinaryOperator::reverseComparisonOp(Op);
305       ResultInt = LInt - RInt; // Opposite order!
306     } else {
307       ResultSym = SymMgr.getSymSymExpr(LSym, BO_Sub, RSym, SymTy);
308       ResultOp = Op;
309       ResultInt = RInt - LInt; // Opposite order!
310     }
311   } else {
312     ResultSym = SymMgr.getSymSymExpr(LSym, Op, RSym, SymTy);
313     ResultInt = (Op == BO_Add) ? (LInt + RInt) : (LInt - RInt);
314     ResultOp = BO_Add;
315     // Bring back the cosmetic difference.
316     if (ResultInt < 0) {
317       ResultInt = -ResultInt;
318       ResultOp = BO_Sub;
319     } else if (ResultInt == 0) {
320       // Shortcut: Simplify "$x + 0" to "$x".
321       return nonloc::SymbolVal(ResultSym);
322     }
323   }
324   const llvm::APSInt &PersistentResultInt = BV.getValue(ResultInt);
325   return nonloc::SymbolVal(
326       SymMgr.getSymIntExpr(ResultSym, ResultOp, PersistentResultInt, ResultTy));
327 }
328 
329 // Rearrange if symbol type matches the result type and if the operator is a
330 // comparison operator, both symbol and constant must be within constant
331 // overflow bounds.
332 static bool shouldRearrange(ProgramStateRef State, BinaryOperator::Opcode Op,
333                             SymbolRef Sym, llvm::APSInt Int, QualType Ty) {
334   return Sym->getType() == Ty &&
335     (!BinaryOperator::isComparisonOp(Op) ||
336      (isWithinConstantOverflowBounds(Sym, State) &&
337       isWithinConstantOverflowBounds(Int)));
338 }
339 
340 static Optional<NonLoc> tryRearrange(ProgramStateRef State,
341                                      BinaryOperator::Opcode Op, NonLoc Lhs,
342                                      NonLoc Rhs, QualType ResultTy) {
343   ProgramStateManager &StateMgr = State->getStateManager();
344   SValBuilder &SVB = StateMgr.getSValBuilder();
345 
346   // We expect everything to be of the same type - this type.
347   QualType SingleTy;
348 
349   // FIXME: After putting complexity threshold to the symbols we can always
350   //        rearrange additive operations but rearrange comparisons only if
351   //        option is set.
352   if (!SVB.getAnalyzerOptions().ShouldAggressivelySimplifyBinaryOperation)
353     return None;
354 
355   SymbolRef LSym = Lhs.getAsSymbol();
356   if (!LSym)
357     return None;
358 
359   if (BinaryOperator::isComparisonOp(Op)) {
360     SingleTy = LSym->getType();
361     if (ResultTy != SVB.getConditionType())
362       return None;
363     // Initialize SingleTy later with a symbol's type.
364   } else if (BinaryOperator::isAdditiveOp(Op)) {
365     SingleTy = ResultTy;
366     if (LSym->getType() != SingleTy)
367       return None;
368   } else {
369     // Don't rearrange other operations.
370     return None;
371   }
372 
373   assert(!SingleTy.isNull() && "We should have figured out the type by now!");
374 
375   // Rearrange signed symbolic expressions only
376   if (!SingleTy->isSignedIntegerOrEnumerationType())
377     return None;
378 
379   SymbolRef RSym = Rhs.getAsSymbol();
380   if (!RSym || RSym->getType() != SingleTy)
381     return None;
382 
383   BasicValueFactory &BV = State->getBasicVals();
384   llvm::APSInt LInt, RInt;
385   std::tie(LSym, LInt) = decomposeSymbol(LSym, BV);
386   std::tie(RSym, RInt) = decomposeSymbol(RSym, BV);
387   if (!shouldRearrange(State, Op, LSym, LInt, SingleTy) ||
388       !shouldRearrange(State, Op, RSym, RInt, SingleTy))
389     return None;
390 
391   // We know that no overflows can occur anymore.
392   return doRearrangeUnchecked(State, Op, LSym, LInt, RSym, RInt);
393 }
394 
395 SVal SimpleSValBuilder::evalBinOpNN(ProgramStateRef state,
396                                   BinaryOperator::Opcode op,
397                                   NonLoc lhs, NonLoc rhs,
398                                   QualType resultTy)  {
399   NonLoc InputLHS = lhs;
400   NonLoc InputRHS = rhs;
401 
402   // Constraints may have changed since the creation of a bound SVal. Check if
403   // the values can be simplified based on those new constraints.
404   SVal simplifiedLhs = simplifySVal(state, lhs);
405   SVal simplifiedRhs = simplifySVal(state, rhs);
406   if (auto simplifiedLhsAsNonLoc = simplifiedLhs.getAs<NonLoc>())
407     lhs = *simplifiedLhsAsNonLoc;
408   if (auto simplifiedRhsAsNonLoc = simplifiedRhs.getAs<NonLoc>())
409     rhs = *simplifiedRhsAsNonLoc;
410 
411   // Handle trivial case where left-side and right-side are the same.
412   if (lhs == rhs)
413     switch (op) {
414       default:
415         break;
416       case BO_EQ:
417       case BO_LE:
418       case BO_GE:
419         return makeTruthVal(true, resultTy);
420       case BO_LT:
421       case BO_GT:
422       case BO_NE:
423         return makeTruthVal(false, resultTy);
424       case BO_Xor:
425       case BO_Sub:
426         if (resultTy->isIntegralOrEnumerationType())
427           return makeIntVal(0, resultTy);
428         return evalCast(makeIntVal(0, /*isUnsigned=*/false), resultTy,
429                         QualType{});
430       case BO_Or:
431       case BO_And:
432         return evalCast(lhs, resultTy, QualType{});
433     }
434 
435   while (true) {
436     switch (lhs.getSubKind()) {
437     default:
438       return makeSymExprValNN(op, lhs, rhs, resultTy);
439     case nonloc::PointerToMemberKind: {
440       assert(rhs.getSubKind() == nonloc::PointerToMemberKind &&
441              "Both SVals should have pointer-to-member-type");
442       auto LPTM = lhs.castAs<nonloc::PointerToMember>(),
443            RPTM = rhs.castAs<nonloc::PointerToMember>();
444       auto LPTMD = LPTM.getPTMData(), RPTMD = RPTM.getPTMData();
445       switch (op) {
446         case BO_EQ:
447           return makeTruthVal(LPTMD == RPTMD, resultTy);
448         case BO_NE:
449           return makeTruthVal(LPTMD != RPTMD, resultTy);
450         default:
451           return UnknownVal();
452       }
453     }
454     case nonloc::LocAsIntegerKind: {
455       Loc lhsL = lhs.castAs<nonloc::LocAsInteger>().getLoc();
456       switch (rhs.getSubKind()) {
457         case nonloc::LocAsIntegerKind:
458           // FIXME: at the moment the implementation
459           // of modeling "pointers as integers" is not complete.
460           if (!BinaryOperator::isComparisonOp(op))
461             return UnknownVal();
462           return evalBinOpLL(state, op, lhsL,
463                              rhs.castAs<nonloc::LocAsInteger>().getLoc(),
464                              resultTy);
465         case nonloc::ConcreteIntKind: {
466           // FIXME: at the moment the implementation
467           // of modeling "pointers as integers" is not complete.
468           if (!BinaryOperator::isComparisonOp(op))
469             return UnknownVal();
470           // Transform the integer into a location and compare.
471           // FIXME: This only makes sense for comparisons. If we want to, say,
472           // add 1 to a LocAsInteger, we'd better unpack the Loc and add to it,
473           // then pack it back into a LocAsInteger.
474           llvm::APSInt i = rhs.castAs<nonloc::ConcreteInt>().getValue();
475           // If the region has a symbolic base, pay attention to the type; it
476           // might be coming from a non-default address space. For non-symbolic
477           // regions it doesn't matter that much because such comparisons would
478           // most likely evaluate to concrete false anyway. FIXME: We might
479           // still need to handle the non-comparison case.
480           if (SymbolRef lSym = lhs.getAsLocSymbol(true))
481             BasicVals.getAPSIntType(lSym->getType()).apply(i);
482           else
483             BasicVals.getAPSIntType(Context.VoidPtrTy).apply(i);
484           return evalBinOpLL(state, op, lhsL, makeLoc(i), resultTy);
485         }
486         default:
487           switch (op) {
488             case BO_EQ:
489               return makeTruthVal(false, resultTy);
490             case BO_NE:
491               return makeTruthVal(true, resultTy);
492             default:
493               // This case also handles pointer arithmetic.
494               return makeSymExprValNN(op, InputLHS, InputRHS, resultTy);
495           }
496       }
497     }
498     case nonloc::ConcreteIntKind: {
499       llvm::APSInt LHSValue = lhs.castAs<nonloc::ConcreteInt>().getValue();
500 
501       // If we're dealing with two known constants, just perform the operation.
502       if (const llvm::APSInt *KnownRHSValue = getKnownValue(state, rhs)) {
503         llvm::APSInt RHSValue = *KnownRHSValue;
504         if (BinaryOperator::isComparisonOp(op)) {
505           // We're looking for a type big enough to compare the two values.
506           // FIXME: This is not correct. char + short will result in a promotion
507           // to int. Unfortunately we have lost types by this point.
508           APSIntType CompareType = std::max(APSIntType(LHSValue),
509                                             APSIntType(RHSValue));
510           CompareType.apply(LHSValue);
511           CompareType.apply(RHSValue);
512         } else if (!BinaryOperator::isShiftOp(op)) {
513           APSIntType IntType = BasicVals.getAPSIntType(resultTy);
514           IntType.apply(LHSValue);
515           IntType.apply(RHSValue);
516         }
517 
518         const llvm::APSInt *Result =
519           BasicVals.evalAPSInt(op, LHSValue, RHSValue);
520         if (!Result)
521           return UndefinedVal();
522 
523         return nonloc::ConcreteInt(*Result);
524       }
525 
526       // Swap the left and right sides and flip the operator if doing so
527       // allows us to better reason about the expression (this is a form
528       // of expression canonicalization).
529       // While we're at it, catch some special cases for non-commutative ops.
530       switch (op) {
531       case BO_LT:
532       case BO_GT:
533       case BO_LE:
534       case BO_GE:
535         op = BinaryOperator::reverseComparisonOp(op);
536         LLVM_FALLTHROUGH;
537       case BO_EQ:
538       case BO_NE:
539       case BO_Add:
540       case BO_Mul:
541       case BO_And:
542       case BO_Xor:
543       case BO_Or:
544         std::swap(lhs, rhs);
545         continue;
546       case BO_Shr:
547         // (~0)>>a
548         if (LHSValue.isAllOnes() && LHSValue.isSigned())
549           return evalCast(lhs, resultTy, QualType{});
550         LLVM_FALLTHROUGH;
551       case BO_Shl:
552         // 0<<a and 0>>a
553         if (LHSValue == 0)
554           return evalCast(lhs, resultTy, QualType{});
555         return makeSymExprValNN(op, InputLHS, InputRHS, resultTy);
556       case BO_Div:
557         // 0 / x == 0
558       case BO_Rem:
559         // 0 % x == 0
560         if (LHSValue == 0)
561           return makeZeroVal(resultTy);
562         LLVM_FALLTHROUGH;
563       default:
564         return makeSymExprValNN(op, InputLHS, InputRHS, resultTy);
565       }
566     }
567     case nonloc::SymbolValKind: {
568       // We only handle LHS as simple symbols or SymIntExprs.
569       SymbolRef Sym = lhs.castAs<nonloc::SymbolVal>().getSymbol();
570 
571       // LHS is a symbolic expression.
572       if (const SymIntExpr *symIntExpr = dyn_cast<SymIntExpr>(Sym)) {
573 
574         // Is this a logical not? (!x is represented as x == 0.)
575         if (op == BO_EQ && rhs.isZeroConstant()) {
576           // We know how to negate certain expressions. Simplify them here.
577 
578           BinaryOperator::Opcode opc = symIntExpr->getOpcode();
579           switch (opc) {
580           default:
581             // We don't know how to negate this operation.
582             // Just handle it as if it were a normal comparison to 0.
583             break;
584           case BO_LAnd:
585           case BO_LOr:
586             llvm_unreachable("Logical operators handled by branching logic.");
587           case BO_Assign:
588           case BO_MulAssign:
589           case BO_DivAssign:
590           case BO_RemAssign:
591           case BO_AddAssign:
592           case BO_SubAssign:
593           case BO_ShlAssign:
594           case BO_ShrAssign:
595           case BO_AndAssign:
596           case BO_XorAssign:
597           case BO_OrAssign:
598           case BO_Comma:
599             llvm_unreachable("'=' and ',' operators handled by ExprEngine.");
600           case BO_PtrMemD:
601           case BO_PtrMemI:
602             llvm_unreachable("Pointer arithmetic not handled here.");
603           case BO_LT:
604           case BO_GT:
605           case BO_LE:
606           case BO_GE:
607           case BO_EQ:
608           case BO_NE:
609             assert(resultTy->isBooleanType() ||
610                    resultTy == getConditionType());
611             assert(symIntExpr->getType()->isBooleanType() ||
612                    getContext().hasSameUnqualifiedType(symIntExpr->getType(),
613                                                        getConditionType()));
614             // Negate the comparison and make a value.
615             opc = BinaryOperator::negateComparisonOp(opc);
616             return makeNonLoc(symIntExpr->getLHS(), opc,
617                 symIntExpr->getRHS(), resultTy);
618           }
619         }
620 
621         // For now, only handle expressions whose RHS is a constant.
622         if (const llvm::APSInt *RHSValue = getKnownValue(state, rhs)) {
623           // If both the LHS and the current expression are additive,
624           // fold their constants and try again.
625           if (BinaryOperator::isAdditiveOp(op)) {
626             BinaryOperator::Opcode lop = symIntExpr->getOpcode();
627             if (BinaryOperator::isAdditiveOp(lop)) {
628               // Convert the two constants to a common type, then combine them.
629 
630               // resultTy may not be the best type to convert to, but it's
631               // probably the best choice in expressions with mixed type
632               // (such as x+1U+2LL). The rules for implicit conversions should
633               // choose a reasonable type to preserve the expression, and will
634               // at least match how the value is going to be used.
635               APSIntType IntType = BasicVals.getAPSIntType(resultTy);
636               const llvm::APSInt &first = IntType.convert(symIntExpr->getRHS());
637               const llvm::APSInt &second = IntType.convert(*RHSValue);
638 
639               const llvm::APSInt *newRHS;
640               if (lop == op)
641                 newRHS = BasicVals.evalAPSInt(BO_Add, first, second);
642               else
643                 newRHS = BasicVals.evalAPSInt(BO_Sub, first, second);
644 
645               assert(newRHS && "Invalid operation despite common type!");
646               rhs = nonloc::ConcreteInt(*newRHS);
647               lhs = nonloc::SymbolVal(symIntExpr->getLHS());
648               op = lop;
649               continue;
650             }
651           }
652 
653           // Otherwise, make a SymIntExpr out of the expression.
654           return MakeSymIntVal(symIntExpr, op, *RHSValue, resultTy);
655         }
656       }
657 
658       // Is the RHS a constant?
659       if (const llvm::APSInt *RHSValue = getKnownValue(state, rhs))
660         return MakeSymIntVal(Sym, op, *RHSValue, resultTy);
661 
662       if (Optional<NonLoc> V = tryRearrange(state, op, lhs, rhs, resultTy))
663         return *V;
664 
665       // Give up -- this is not a symbolic expression we can handle.
666       return makeSymExprValNN(op, InputLHS, InputRHS, resultTy);
667     }
668     }
669   }
670 }
671 
672 static SVal evalBinOpFieldRegionFieldRegion(const FieldRegion *LeftFR,
673                                             const FieldRegion *RightFR,
674                                             BinaryOperator::Opcode op,
675                                             QualType resultTy,
676                                             SimpleSValBuilder &SVB) {
677   // Only comparisons are meaningful here!
678   if (!BinaryOperator::isComparisonOp(op))
679     return UnknownVal();
680 
681   // Next, see if the two FRs have the same super-region.
682   // FIXME: This doesn't handle casts yet, and simply stripping the casts
683   // doesn't help.
684   if (LeftFR->getSuperRegion() != RightFR->getSuperRegion())
685     return UnknownVal();
686 
687   const FieldDecl *LeftFD = LeftFR->getDecl();
688   const FieldDecl *RightFD = RightFR->getDecl();
689   const RecordDecl *RD = LeftFD->getParent();
690 
691   // Make sure the two FRs are from the same kind of record. Just in case!
692   // FIXME: This is probably where inheritance would be a problem.
693   if (RD != RightFD->getParent())
694     return UnknownVal();
695 
696   // We know for sure that the two fields are not the same, since that
697   // would have given us the same SVal.
698   if (op == BO_EQ)
699     return SVB.makeTruthVal(false, resultTy);
700   if (op == BO_NE)
701     return SVB.makeTruthVal(true, resultTy);
702 
703   // Iterate through the fields and see which one comes first.
704   // [C99 6.7.2.1.13] "Within a structure object, the non-bit-field
705   // members and the units in which bit-fields reside have addresses that
706   // increase in the order in which they are declared."
707   bool leftFirst = (op == BO_LT || op == BO_LE);
708   for (const auto *I : RD->fields()) {
709     if (I == LeftFD)
710       return SVB.makeTruthVal(leftFirst, resultTy);
711     if (I == RightFD)
712       return SVB.makeTruthVal(!leftFirst, resultTy);
713   }
714 
715   llvm_unreachable("Fields not found in parent record's definition");
716 }
717 
718 // This is used in debug builds only for now because some downstream users
719 // may hit this assert in their subsequent merges.
720 // There are still places in the analyzer where equal bitwidth Locs
721 // are compared, and need to be found and corrected. Recent previous fixes have
722 // addressed the known problems of making NULLs with specific bitwidths
723 // for Loc comparisons along with deprecation of APIs for the same purpose.
724 //
725 static void assertEqualBitWidths(ProgramStateRef State, Loc RhsLoc,
726                                  Loc LhsLoc) {
727   // Implements a "best effort" check for RhsLoc and LhsLoc bit widths
728   ASTContext &Ctx = State->getStateManager().getContext();
729   uint64_t RhsBitwidth =
730       RhsLoc.getType(Ctx).isNull() ? 0 : Ctx.getTypeSize(RhsLoc.getType(Ctx));
731   uint64_t LhsBitwidth =
732       LhsLoc.getType(Ctx).isNull() ? 0 : Ctx.getTypeSize(LhsLoc.getType(Ctx));
733   if (RhsBitwidth && LhsBitwidth &&
734       (LhsLoc.getSubKind() == RhsLoc.getSubKind())) {
735     assert(RhsBitwidth == LhsBitwidth &&
736            "RhsLoc and LhsLoc bitwidth must be same!");
737   }
738 }
739 
740 // FIXME: all this logic will change if/when we have MemRegion::getLocation().
741 SVal SimpleSValBuilder::evalBinOpLL(ProgramStateRef state,
742                                   BinaryOperator::Opcode op,
743                                   Loc lhs, Loc rhs,
744                                   QualType resultTy) {
745 
746   // Assert that bitwidth of lhs and rhs are the same.
747   // This can happen if two different address spaces are used,
748   // and the bitwidths of the address spaces are different.
749   // See LIT case clang/test/Analysis/cstring-checker-addressspace.c
750   // FIXME: See comment above in the function assertEqualBitWidths
751   assertEqualBitWidths(state, rhs, lhs);
752 
753   // Only comparisons and subtractions are valid operations on two pointers.
754   // See [C99 6.5.5 through 6.5.14] or [C++0x 5.6 through 5.15].
755   // However, if a pointer is casted to an integer, evalBinOpNN may end up
756   // calling this function with another operation (PR7527). We don't attempt to
757   // model this for now, but it could be useful, particularly when the
758   // "location" is actually an integer value that's been passed through a void*.
759   if (!(BinaryOperator::isComparisonOp(op) || op == BO_Sub))
760     return UnknownVal();
761 
762   // Special cases for when both sides are identical.
763   if (lhs == rhs) {
764     switch (op) {
765     default:
766       llvm_unreachable("Unimplemented operation for two identical values");
767     case BO_Sub:
768       return makeZeroVal(resultTy);
769     case BO_EQ:
770     case BO_LE:
771     case BO_GE:
772       return makeTruthVal(true, resultTy);
773     case BO_NE:
774     case BO_LT:
775     case BO_GT:
776       return makeTruthVal(false, resultTy);
777     }
778   }
779 
780   switch (lhs.getSubKind()) {
781   default:
782     llvm_unreachable("Ordering not implemented for this Loc.");
783 
784   case loc::GotoLabelKind:
785     // The only thing we know about labels is that they're non-null.
786     if (rhs.isZeroConstant()) {
787       switch (op) {
788       default:
789         break;
790       case BO_Sub:
791         return evalCast(lhs, resultTy, QualType{});
792       case BO_EQ:
793       case BO_LE:
794       case BO_LT:
795         return makeTruthVal(false, resultTy);
796       case BO_NE:
797       case BO_GT:
798       case BO_GE:
799         return makeTruthVal(true, resultTy);
800       }
801     }
802     // There may be two labels for the same location, and a function region may
803     // have the same address as a label at the start of the function (depending
804     // on the ABI).
805     // FIXME: we can probably do a comparison against other MemRegions, though.
806     // FIXME: is there a way to tell if two labels refer to the same location?
807     return UnknownVal();
808 
809   case loc::ConcreteIntKind: {
810     // If one of the operands is a symbol and the other is a constant,
811     // build an expression for use by the constraint manager.
812     if (SymbolRef rSym = rhs.getAsLocSymbol()) {
813       // We can only build expressions with symbols on the left,
814       // so we need a reversible operator.
815       if (!BinaryOperator::isComparisonOp(op) || op == BO_Cmp)
816         return UnknownVal();
817 
818       const llvm::APSInt &lVal = lhs.castAs<loc::ConcreteInt>().getValue();
819       op = BinaryOperator::reverseComparisonOp(op);
820       return makeNonLoc(rSym, op, lVal, resultTy);
821     }
822 
823     // If both operands are constants, just perform the operation.
824     if (Optional<loc::ConcreteInt> rInt = rhs.getAs<loc::ConcreteInt>()) {
825       SVal ResultVal =
826           lhs.castAs<loc::ConcreteInt>().evalBinOp(BasicVals, op, *rInt);
827       if (Optional<NonLoc> Result = ResultVal.getAs<NonLoc>())
828         return evalCast(*Result, resultTy, QualType{});
829 
830       assert(!ResultVal.getAs<Loc>() && "Loc-Loc ops should not produce Locs");
831       return UnknownVal();
832     }
833 
834     // Special case comparisons against NULL.
835     // This must come after the test if the RHS is a symbol, which is used to
836     // build constraints. The address of any non-symbolic region is guaranteed
837     // to be non-NULL, as is any label.
838     assert(rhs.getAs<loc::MemRegionVal>() || rhs.getAs<loc::GotoLabel>());
839     if (lhs.isZeroConstant()) {
840       switch (op) {
841       default:
842         break;
843       case BO_EQ:
844       case BO_GT:
845       case BO_GE:
846         return makeTruthVal(false, resultTy);
847       case BO_NE:
848       case BO_LT:
849       case BO_LE:
850         return makeTruthVal(true, resultTy);
851       }
852     }
853 
854     // Comparing an arbitrary integer to a region or label address is
855     // completely unknowable.
856     return UnknownVal();
857   }
858   case loc::MemRegionValKind: {
859     if (Optional<loc::ConcreteInt> rInt = rhs.getAs<loc::ConcreteInt>()) {
860       // If one of the operands is a symbol and the other is a constant,
861       // build an expression for use by the constraint manager.
862       if (SymbolRef lSym = lhs.getAsLocSymbol(true)) {
863         if (BinaryOperator::isComparisonOp(op))
864           return MakeSymIntVal(lSym, op, rInt->getValue(), resultTy);
865         return UnknownVal();
866       }
867       // Special case comparisons to NULL.
868       // This must come after the test if the LHS is a symbol, which is used to
869       // build constraints. The address of any non-symbolic region is guaranteed
870       // to be non-NULL.
871       if (rInt->isZeroConstant()) {
872         if (op == BO_Sub)
873           return evalCast(lhs, resultTy, QualType{});
874 
875         if (BinaryOperator::isComparisonOp(op)) {
876           QualType boolType = getContext().BoolTy;
877           NonLoc l = evalCast(lhs, boolType, QualType{}).castAs<NonLoc>();
878           NonLoc r = makeTruthVal(false, boolType).castAs<NonLoc>();
879           return evalBinOpNN(state, op, l, r, resultTy);
880         }
881       }
882 
883       // Comparing a region to an arbitrary integer is completely unknowable.
884       return UnknownVal();
885     }
886 
887     // Get both values as regions, if possible.
888     const MemRegion *LeftMR = lhs.getAsRegion();
889     assert(LeftMR && "MemRegionValKind SVal doesn't have a region!");
890 
891     const MemRegion *RightMR = rhs.getAsRegion();
892     if (!RightMR)
893       // The RHS is probably a label, which in theory could address a region.
894       // FIXME: we can probably make a more useful statement about non-code
895       // regions, though.
896       return UnknownVal();
897 
898     const MemRegion *LeftBase = LeftMR->getBaseRegion();
899     const MemRegion *RightBase = RightMR->getBaseRegion();
900     const MemSpaceRegion *LeftMS = LeftBase->getMemorySpace();
901     const MemSpaceRegion *RightMS = RightBase->getMemorySpace();
902     const MemSpaceRegion *UnknownMS = MemMgr.getUnknownRegion();
903 
904     // If the two regions are from different known memory spaces they cannot be
905     // equal. Also, assume that no symbolic region (whose memory space is
906     // unknown) is on the stack.
907     if (LeftMS != RightMS &&
908         ((LeftMS != UnknownMS && RightMS != UnknownMS) ||
909          (isa<StackSpaceRegion>(LeftMS) || isa<StackSpaceRegion>(RightMS)))) {
910       switch (op) {
911       default:
912         return UnknownVal();
913       case BO_EQ:
914         return makeTruthVal(false, resultTy);
915       case BO_NE:
916         return makeTruthVal(true, resultTy);
917       }
918     }
919 
920     // If both values wrap regions, see if they're from different base regions.
921     // Note, heap base symbolic regions are assumed to not alias with
922     // each other; for example, we assume that malloc returns different address
923     // on each invocation.
924     // FIXME: ObjC object pointers always reside on the heap, but currently
925     // we treat their memory space as unknown, because symbolic pointers
926     // to ObjC objects may alias. There should be a way to construct
927     // possibly-aliasing heap-based regions. For instance, MacOSXApiChecker
928     // guesses memory space for ObjC object pointers manually instead of
929     // relying on us.
930     if (LeftBase != RightBase &&
931         ((!isa<SymbolicRegion>(LeftBase) && !isa<SymbolicRegion>(RightBase)) ||
932          (isa<HeapSpaceRegion>(LeftMS) || isa<HeapSpaceRegion>(RightMS))) ){
933       switch (op) {
934       default:
935         return UnknownVal();
936       case BO_EQ:
937         return makeTruthVal(false, resultTy);
938       case BO_NE:
939         return makeTruthVal(true, resultTy);
940       }
941     }
942 
943     // Handle special cases for when both regions are element regions.
944     const ElementRegion *RightER = dyn_cast<ElementRegion>(RightMR);
945     const ElementRegion *LeftER = dyn_cast<ElementRegion>(LeftMR);
946     if (RightER && LeftER) {
947       // Next, see if the two ERs have the same super-region and matching types.
948       // FIXME: This should do something useful even if the types don't match,
949       // though if both indexes are constant the RegionRawOffset path will
950       // give the correct answer.
951       if (LeftER->getSuperRegion() == RightER->getSuperRegion() &&
952           LeftER->getElementType() == RightER->getElementType()) {
953         // Get the left index and cast it to the correct type.
954         // If the index is unknown or undefined, bail out here.
955         SVal LeftIndexVal = LeftER->getIndex();
956         Optional<NonLoc> LeftIndex = LeftIndexVal.getAs<NonLoc>();
957         if (!LeftIndex)
958           return UnknownVal();
959         LeftIndexVal = evalCast(*LeftIndex, ArrayIndexTy, QualType{});
960         LeftIndex = LeftIndexVal.getAs<NonLoc>();
961         if (!LeftIndex)
962           return UnknownVal();
963 
964         // Do the same for the right index.
965         SVal RightIndexVal = RightER->getIndex();
966         Optional<NonLoc> RightIndex = RightIndexVal.getAs<NonLoc>();
967         if (!RightIndex)
968           return UnknownVal();
969         RightIndexVal = evalCast(*RightIndex, ArrayIndexTy, QualType{});
970         RightIndex = RightIndexVal.getAs<NonLoc>();
971         if (!RightIndex)
972           return UnknownVal();
973 
974         // Actually perform the operation.
975         // evalBinOpNN expects the two indexes to already be the right type.
976         return evalBinOpNN(state, op, *LeftIndex, *RightIndex, resultTy);
977       }
978     }
979 
980     // Special handling of the FieldRegions, even with symbolic offsets.
981     const FieldRegion *RightFR = dyn_cast<FieldRegion>(RightMR);
982     const FieldRegion *LeftFR = dyn_cast<FieldRegion>(LeftMR);
983     if (RightFR && LeftFR) {
984       SVal R = evalBinOpFieldRegionFieldRegion(LeftFR, RightFR, op, resultTy,
985                                                *this);
986       if (!R.isUnknown())
987         return R;
988     }
989 
990     // Compare the regions using the raw offsets.
991     RegionOffset LeftOffset = LeftMR->getAsOffset();
992     RegionOffset RightOffset = RightMR->getAsOffset();
993 
994     if (LeftOffset.getRegion() != nullptr &&
995         LeftOffset.getRegion() == RightOffset.getRegion() &&
996         !LeftOffset.hasSymbolicOffset() && !RightOffset.hasSymbolicOffset()) {
997       int64_t left = LeftOffset.getOffset();
998       int64_t right = RightOffset.getOffset();
999 
1000       switch (op) {
1001         default:
1002           return UnknownVal();
1003         case BO_LT:
1004           return makeTruthVal(left < right, resultTy);
1005         case BO_GT:
1006           return makeTruthVal(left > right, resultTy);
1007         case BO_LE:
1008           return makeTruthVal(left <= right, resultTy);
1009         case BO_GE:
1010           return makeTruthVal(left >= right, resultTy);
1011         case BO_EQ:
1012           return makeTruthVal(left == right, resultTy);
1013         case BO_NE:
1014           return makeTruthVal(left != right, resultTy);
1015       }
1016     }
1017 
1018     // At this point we're not going to get a good answer, but we can try
1019     // conjuring an expression instead.
1020     SymbolRef LHSSym = lhs.getAsLocSymbol();
1021     SymbolRef RHSSym = rhs.getAsLocSymbol();
1022     if (LHSSym && RHSSym)
1023       return makeNonLoc(LHSSym, op, RHSSym, resultTy);
1024 
1025     // If we get here, we have no way of comparing the regions.
1026     return UnknownVal();
1027   }
1028   }
1029 }
1030 
1031 SVal SimpleSValBuilder::evalBinOpLN(ProgramStateRef state,
1032                                     BinaryOperator::Opcode op, Loc lhs,
1033                                     NonLoc rhs, QualType resultTy) {
1034   if (op >= BO_PtrMemD && op <= BO_PtrMemI) {
1035     if (auto PTMSV = rhs.getAs<nonloc::PointerToMember>()) {
1036       if (PTMSV->isNullMemberPointer())
1037         return UndefinedVal();
1038 
1039       auto getFieldLValue = [&](const auto *FD) -> SVal {
1040         SVal Result = lhs;
1041 
1042         for (const auto &I : *PTMSV)
1043           Result = StateMgr.getStoreManager().evalDerivedToBase(
1044               Result, I->getType(), I->isVirtual());
1045 
1046         return state->getLValue(FD, Result);
1047       };
1048 
1049       if (const auto *FD = PTMSV->getDeclAs<FieldDecl>()) {
1050         return getFieldLValue(FD);
1051       }
1052       if (const auto *FD = PTMSV->getDeclAs<IndirectFieldDecl>()) {
1053         return getFieldLValue(FD);
1054       }
1055     }
1056 
1057     return rhs;
1058   }
1059 
1060   assert(!BinaryOperator::isComparisonOp(op) &&
1061          "arguments to comparison ops must be of the same type");
1062 
1063   // Special case: rhs is a zero constant.
1064   if (rhs.isZeroConstant())
1065     return lhs;
1066 
1067   // Perserve the null pointer so that it can be found by the DerefChecker.
1068   if (lhs.isZeroConstant())
1069     return lhs;
1070 
1071   // We are dealing with pointer arithmetic.
1072 
1073   // Handle pointer arithmetic on constant values.
1074   if (Optional<nonloc::ConcreteInt> rhsInt = rhs.getAs<nonloc::ConcreteInt>()) {
1075     if (Optional<loc::ConcreteInt> lhsInt = lhs.getAs<loc::ConcreteInt>()) {
1076       const llvm::APSInt &leftI = lhsInt->getValue();
1077       assert(leftI.isUnsigned());
1078       llvm::APSInt rightI(rhsInt->getValue(), /* isUnsigned */ true);
1079 
1080       // Convert the bitwidth of rightI.  This should deal with overflow
1081       // since we are dealing with concrete values.
1082       rightI = rightI.extOrTrunc(leftI.getBitWidth());
1083 
1084       // Offset the increment by the pointer size.
1085       llvm::APSInt Multiplicand(rightI.getBitWidth(), /* isUnsigned */ true);
1086       QualType pointeeType = resultTy->getPointeeType();
1087       Multiplicand = getContext().getTypeSizeInChars(pointeeType).getQuantity();
1088       rightI *= Multiplicand;
1089 
1090       // Compute the adjusted pointer.
1091       switch (op) {
1092         case BO_Add:
1093           rightI = leftI + rightI;
1094           break;
1095         case BO_Sub:
1096           rightI = leftI - rightI;
1097           break;
1098         default:
1099           llvm_unreachable("Invalid pointer arithmetic operation");
1100       }
1101       return loc::ConcreteInt(getBasicValueFactory().getValue(rightI));
1102     }
1103   }
1104 
1105   // Handle cases where 'lhs' is a region.
1106   if (const MemRegion *region = lhs.getAsRegion()) {
1107     rhs = convertToArrayIndex(rhs).castAs<NonLoc>();
1108     SVal index = UnknownVal();
1109     const SubRegion *superR = nullptr;
1110     // We need to know the type of the pointer in order to add an integer to it.
1111     // Depending on the type, different amount of bytes is added.
1112     QualType elementType;
1113 
1114     if (const ElementRegion *elemReg = dyn_cast<ElementRegion>(region)) {
1115       assert(op == BO_Add || op == BO_Sub);
1116       index = evalBinOpNN(state, op, elemReg->getIndex(), rhs,
1117                           getArrayIndexType());
1118       superR = cast<SubRegion>(elemReg->getSuperRegion());
1119       elementType = elemReg->getElementType();
1120     }
1121     else if (isa<SubRegion>(region)) {
1122       assert(op == BO_Add || op == BO_Sub);
1123       index = (op == BO_Add) ? rhs : evalMinus(rhs);
1124       superR = cast<SubRegion>(region);
1125       // TODO: Is this actually reliable? Maybe improving our MemRegion
1126       // hierarchy to provide typed regions for all non-void pointers would be
1127       // better. For instance, we cannot extend this towards LocAsInteger
1128       // operations, where result type of the expression is integer.
1129       if (resultTy->isAnyPointerType())
1130         elementType = resultTy->getPointeeType();
1131     }
1132 
1133     // Represent arithmetic on void pointers as arithmetic on char pointers.
1134     // It is fine when a TypedValueRegion of char value type represents
1135     // a void pointer. Note that arithmetic on void pointers is a GCC extension.
1136     if (elementType->isVoidType())
1137       elementType = getContext().CharTy;
1138 
1139     if (Optional<NonLoc> indexV = index.getAs<NonLoc>()) {
1140       return loc::MemRegionVal(MemMgr.getElementRegion(elementType, *indexV,
1141                                                        superR, getContext()));
1142     }
1143   }
1144   return UnknownVal();
1145 }
1146 
1147 const llvm::APSInt *SimpleSValBuilder::getKnownValue(ProgramStateRef state,
1148                                                    SVal V) {
1149   V = simplifySVal(state, V);
1150   if (V.isUnknownOrUndef())
1151     return nullptr;
1152 
1153   if (Optional<loc::ConcreteInt> X = V.getAs<loc::ConcreteInt>())
1154     return &X->getValue();
1155 
1156   if (Optional<nonloc::ConcreteInt> X = V.getAs<nonloc::ConcreteInt>())
1157     return &X->getValue();
1158 
1159   if (SymbolRef Sym = V.getAsSymbol())
1160     return state->getConstraintManager().getSymVal(state, Sym);
1161 
1162   return nullptr;
1163 }
1164 
1165 SVal SimpleSValBuilder::simplifyUntilFixpoint(ProgramStateRef State, SVal Val) {
1166   SVal SimplifiedVal = simplifySValOnce(State, Val);
1167   while (SimplifiedVal != Val) {
1168     Val = SimplifiedVal;
1169     SimplifiedVal = simplifySValOnce(State, Val);
1170   }
1171   return SimplifiedVal;
1172 }
1173 
1174 SVal SimpleSValBuilder::simplifySVal(ProgramStateRef State, SVal V) {
1175   return simplifyUntilFixpoint(State, V);
1176 }
1177 
1178 SVal SimpleSValBuilder::simplifySValOnce(ProgramStateRef State, SVal V) {
1179   // For now, this function tries to constant-fold symbols inside a
1180   // nonloc::SymbolVal, and does nothing else. More simplifications should
1181   // be possible, such as constant-folding an index in an ElementRegion.
1182 
1183   class Simplifier : public FullSValVisitor<Simplifier, SVal> {
1184     ProgramStateRef State;
1185     SValBuilder &SVB;
1186 
1187     // Cache results for the lifetime of the Simplifier. Results change every
1188     // time new constraints are added to the program state, which is the whole
1189     // point of simplifying, and for that very reason it's pointless to maintain
1190     // the same cache for the duration of the whole analysis.
1191     llvm::DenseMap<SymbolRef, SVal> Cached;
1192 
1193     static bool isUnchanged(SymbolRef Sym, SVal Val) {
1194       return Sym == Val.getAsSymbol();
1195     }
1196 
1197     SVal cache(SymbolRef Sym, SVal V) {
1198       Cached[Sym] = V;
1199       return V;
1200     }
1201 
1202     SVal skip(SymbolRef Sym) {
1203       return cache(Sym, SVB.makeSymbolVal(Sym));
1204     }
1205 
1206     // Return the known const value for the Sym if available, or return Undef
1207     // otherwise.
1208     SVal getConst(SymbolRef Sym) {
1209       const llvm::APSInt *Const =
1210           State->getConstraintManager().getSymVal(State, Sym);
1211       if (Const)
1212         return Loc::isLocType(Sym->getType()) ? (SVal)SVB.makeIntLocVal(*Const)
1213                                               : (SVal)SVB.makeIntVal(*Const);
1214       return UndefinedVal();
1215     }
1216 
1217     SVal getConstOrVisit(SymbolRef Sym) {
1218       const SVal Ret = getConst(Sym);
1219       if (Ret.isUndef())
1220         return Visit(Sym);
1221       return Ret;
1222     }
1223 
1224   public:
1225     Simplifier(ProgramStateRef State)
1226         : State(State), SVB(State->getStateManager().getSValBuilder()) {}
1227 
1228     SVal VisitSymbolData(const SymbolData *S) {
1229       // No cache here.
1230       if (const llvm::APSInt *I =
1231               SVB.getKnownValue(State, SVB.makeSymbolVal(S)))
1232         return Loc::isLocType(S->getType()) ? (SVal)SVB.makeIntLocVal(*I)
1233                                             : (SVal)SVB.makeIntVal(*I);
1234       return SVB.makeSymbolVal(S);
1235     }
1236 
1237     // TODO: Support SymbolCast.
1238 
1239     SVal VisitSymIntExpr(const SymIntExpr *S) {
1240       auto I = Cached.find(S);
1241       if (I != Cached.end())
1242         return I->second;
1243 
1244       SVal LHS = getConstOrVisit(S->getLHS());
1245       if (isUnchanged(S->getLHS(), LHS))
1246         return skip(S);
1247 
1248       SVal RHS;
1249       // By looking at the APSInt in the right-hand side of S, we cannot
1250       // figure out if it should be treated as a Loc or as a NonLoc.
1251       // So make our guess by recalling that we cannot multiply pointers
1252       // or compare a pointer to an integer.
1253       if (Loc::isLocType(S->getLHS()->getType()) &&
1254           BinaryOperator::isComparisonOp(S->getOpcode())) {
1255         // The usual conversion of $sym to &SymRegion{$sym}, as they have
1256         // the same meaning for Loc-type symbols, but the latter form
1257         // is preferred in SVal computations for being Loc itself.
1258         if (SymbolRef Sym = LHS.getAsSymbol()) {
1259           assert(Loc::isLocType(Sym->getType()));
1260           LHS = SVB.makeLoc(Sym);
1261         }
1262         RHS = SVB.makeIntLocVal(S->getRHS());
1263       } else {
1264         RHS = SVB.makeIntVal(S->getRHS());
1265       }
1266 
1267       return cache(
1268           S, SVB.evalBinOp(State, S->getOpcode(), LHS, RHS, S->getType()));
1269     }
1270 
1271     SVal VisitIntSymExpr(const IntSymExpr *S) {
1272       auto I = Cached.find(S);
1273       if (I != Cached.end())
1274         return I->second;
1275 
1276       SVal RHS = getConstOrVisit(S->getRHS());
1277       if (isUnchanged(S->getRHS(), RHS))
1278         return skip(S);
1279 
1280       SVal LHS = SVB.makeIntVal(S->getLHS());
1281       return cache(
1282           S, SVB.evalBinOp(State, S->getOpcode(), LHS, RHS, S->getType()));
1283     }
1284 
1285     SVal VisitSymSymExpr(const SymSymExpr *S) {
1286       auto I = Cached.find(S);
1287       if (I != Cached.end())
1288         return I->second;
1289 
1290       // For now don't try to simplify mixed Loc/NonLoc expressions
1291       // because they often appear from LocAsInteger operations
1292       // and we don't know how to combine a LocAsInteger
1293       // with a concrete value.
1294       if (Loc::isLocType(S->getLHS()->getType()) !=
1295           Loc::isLocType(S->getRHS()->getType()))
1296         return skip(S);
1297 
1298       SVal LHS = getConstOrVisit(S->getLHS());
1299       SVal RHS = getConstOrVisit(S->getRHS());
1300 
1301       if (isUnchanged(S->getLHS(), LHS) && isUnchanged(S->getRHS(), RHS))
1302         return skip(S);
1303 
1304       return cache(
1305           S, SVB.evalBinOp(State, S->getOpcode(), LHS, RHS, S->getType()));
1306     }
1307 
1308     SVal VisitSymExpr(SymbolRef S) { return nonloc::SymbolVal(S); }
1309 
1310     SVal VisitMemRegion(const MemRegion *R) { return loc::MemRegionVal(R); }
1311 
1312     SVal VisitNonLocSymbolVal(nonloc::SymbolVal V) {
1313       // Simplification is much more costly than computing complexity.
1314       // For high complexity, it may be not worth it.
1315       return Visit(V.getSymbol());
1316     }
1317 
1318     SVal VisitSVal(SVal V) { return V; }
1319   };
1320 
1321   // A crude way of preventing this function from calling itself from evalBinOp.
1322   static bool isReentering = false;
1323   if (isReentering)
1324     return V;
1325 
1326   isReentering = true;
1327   SVal SimplifiedV = Simplifier(State).Visit(V);
1328   isReentering = false;
1329 
1330   return SimplifiedV;
1331 }
1332