1 //===-- DataflowEnvironment.cpp ---------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file defines an Environment class that is used by dataflow analyses
10 //  that run over Control-Flow Graphs (CFGs) to keep track of the state of the
11 //  program at given program points.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h"
16 #include "clang/AST/Decl.h"
17 #include "clang/AST/DeclCXX.h"
18 #include "clang/AST/Type.h"
19 #include "clang/Analysis/FlowSensitive/DataflowLattice.h"
20 #include "clang/Analysis/FlowSensitive/StorageLocation.h"
21 #include "clang/Analysis/FlowSensitive/Value.h"
22 #include "llvm/ADT/DenseMap.h"
23 #include "llvm/ADT/DenseSet.h"
24 #include "llvm/Support/ErrorHandling.h"
25 #include <cassert>
26 #include <memory>
27 #include <utility>
28 
29 namespace clang {
30 namespace dataflow {
31 
32 // FIXME: convert these to parameters of the analysis or environment. Current
33 // settings have been experimentaly validated, but only for a particular
34 // analysis.
35 static constexpr int MaxCompositeValueDepth = 3;
36 static constexpr int MaxCompositeValueSize = 1000;
37 
38 /// Returns a map consisting of key-value entries that are present in both maps.
39 template <typename K, typename V>
40 llvm::DenseMap<K, V> intersectDenseMaps(const llvm::DenseMap<K, V> &Map1,
41                                         const llvm::DenseMap<K, V> &Map2) {
42   llvm::DenseMap<K, V> Result;
43   for (auto &Entry : Map1) {
44     auto It = Map2.find(Entry.first);
45     if (It != Map2.end() && Entry.second == It->second)
46       Result.insert({Entry.first, Entry.second});
47   }
48   return Result;
49 }
50 
51 /// Returns true if and only if `Val1` is equivalent to `Val2`.
52 static bool equivalentValues(QualType Type, Value *Val1,
53                              const Environment &Env1, Value *Val2,
54                              const Environment &Env2,
55                              Environment::ValueModel &Model) {
56   if (Val1 == Val2)
57     return true;
58 
59   if (auto *IndVal1 = dyn_cast<IndirectionValue>(Val1)) {
60     auto *IndVal2 = cast<IndirectionValue>(Val2);
61     assert(IndVal1->getKind() == IndVal2->getKind());
62     return &IndVal1->getPointeeLoc() == &IndVal2->getPointeeLoc();
63   }
64 
65   return Model.compareEquivalent(Type, *Val1, Env1, *Val2, Env2);
66 }
67 
68 /// Initializes a global storage value.
69 static void initGlobalVar(const VarDecl &D, Environment &Env) {
70   if (!D.hasGlobalStorage() ||
71       Env.getStorageLocation(D, SkipPast::None) != nullptr)
72     return;
73 
74   auto &Loc = Env.createStorageLocation(D);
75   Env.setStorageLocation(D, Loc);
76   if (auto *Val = Env.createValue(D.getType()))
77     Env.setValue(Loc, *Val);
78 }
79 
80 /// Initializes a global storage value.
81 static void initGlobalVar(const Decl &D, Environment &Env) {
82   if (auto *V = dyn_cast<VarDecl>(&D))
83     initGlobalVar(*V, Env);
84 }
85 
86 /// Initializes global storage values that are declared or referenced from
87 /// sub-statements of `S`.
88 // FIXME: Add support for resetting globals after function calls to enable
89 // the implementation of sound analyses.
90 static void initGlobalVars(const Stmt &S, Environment &Env) {
91   for (auto *Child : S.children()) {
92     if (Child != nullptr)
93       initGlobalVars(*Child, Env);
94   }
95 
96   if (auto *DS = dyn_cast<DeclStmt>(&S)) {
97     if (DS->isSingleDecl()) {
98       initGlobalVar(*DS->getSingleDecl(), Env);
99     } else {
100       for (auto *D : DS->getDeclGroup())
101         initGlobalVar(*D, Env);
102     }
103   } else if (auto *E = dyn_cast<DeclRefExpr>(&S)) {
104     initGlobalVar(*E->getDecl(), Env);
105   } else if (auto *E = dyn_cast<MemberExpr>(&S)) {
106     initGlobalVar(*E->getMemberDecl(), Env);
107   }
108 }
109 
110 /// Returns constraints that represent the disjunction of `Constraints1` and
111 /// `Constraints2`.
112 ///
113 /// Requirements:
114 ///
115 ///  The elements of `Constraints1` and `Constraints2` must not be null.
116 llvm::DenseSet<BoolValue *>
117 joinConstraints(DataflowAnalysisContext *Context,
118                 const llvm::DenseSet<BoolValue *> &Constraints1,
119                 const llvm::DenseSet<BoolValue *> &Constraints2) {
120   // `(X ^ Y) v (X ^ Z)` is logically equivalent to `X ^ (Y v Z)`. Therefore, to
121   // avoid unnecessarily expanding the resulting set of constraints, we will add
122   // all common constraints of `Constraints1` and `Constraints2` directly and
123   // add a disjunction of the constraints that are not common.
124 
125   llvm::DenseSet<BoolValue *> JoinedConstraints;
126 
127   if (Constraints1.empty() || Constraints2.empty()) {
128     // Disjunction of empty set and non-empty set is represented as empty set.
129     return JoinedConstraints;
130   }
131 
132   BoolValue *Val1 = nullptr;
133   for (BoolValue *Constraint : Constraints1) {
134     if (Constraints2.contains(Constraint)) {
135       // Add common constraints directly to `JoinedConstraints`.
136       JoinedConstraints.insert(Constraint);
137     } else if (Val1 == nullptr) {
138       Val1 = Constraint;
139     } else {
140       Val1 = &Context->getOrCreateConjunctionValue(*Val1, *Constraint);
141     }
142   }
143 
144   BoolValue *Val2 = nullptr;
145   for (BoolValue *Constraint : Constraints2) {
146     // Common constraints are added to `JoinedConstraints` above.
147     if (Constraints1.contains(Constraint)) {
148       continue;
149     }
150     if (Val2 == nullptr) {
151       Val2 = Constraint;
152     } else {
153       Val2 = &Context->getOrCreateConjunctionValue(*Val2, *Constraint);
154     }
155   }
156 
157   // An empty set of constraints (represented as a null value) is interpreted as
158   // `true` and `true v X` is logically equivalent to `true` so we need to add a
159   // constraint only if both `Val1` and `Val2` are not null.
160   if (Val1 != nullptr && Val2 != nullptr)
161     JoinedConstraints.insert(
162         &Context->getOrCreateDisjunctionValue(*Val1, *Val2));
163 
164   return JoinedConstraints;
165 }
166 
167 Environment::Environment(DataflowAnalysisContext &DACtx,
168                          const DeclContext &DeclCtx)
169     : Environment(DACtx) {
170   if (const auto *FuncDecl = dyn_cast<FunctionDecl>(&DeclCtx)) {
171     assert(FuncDecl->getBody() != nullptr);
172     initGlobalVars(*FuncDecl->getBody(), *this);
173     for (const auto *ParamDecl : FuncDecl->parameters()) {
174       assert(ParamDecl != nullptr);
175       auto &ParamLoc = createStorageLocation(*ParamDecl);
176       setStorageLocation(*ParamDecl, ParamLoc);
177       if (Value *ParamVal = createValue(ParamDecl->getType()))
178         setValue(ParamLoc, *ParamVal);
179     }
180   }
181 
182   if (const auto *MethodDecl = dyn_cast<CXXMethodDecl>(&DeclCtx)) {
183     if (!MethodDecl->isStatic()) {
184       QualType ThisPointeeType = MethodDecl->getThisObjectType();
185       // FIXME: Add support for union types.
186       if (!ThisPointeeType->isUnionType()) {
187         auto &ThisPointeeLoc = createStorageLocation(ThisPointeeType);
188         DACtx.setThisPointeeStorageLocation(ThisPointeeLoc);
189         if (Value *ThisPointeeVal = createValue(ThisPointeeType))
190           setValue(ThisPointeeLoc, *ThisPointeeVal);
191       }
192     }
193   }
194 }
195 
196 bool Environment::equivalentTo(const Environment &Other,
197                                Environment::ValueModel &Model) const {
198   assert(DACtx == Other.DACtx);
199 
200   if (DeclToLoc != Other.DeclToLoc)
201     return false;
202 
203   if (ExprToLoc != Other.ExprToLoc)
204     return false;
205 
206   if (MemberLocToStruct != Other.MemberLocToStruct)
207     return false;
208 
209   if (LocToVal.size() != Other.LocToVal.size())
210     return false;
211 
212   for (auto &Entry : LocToVal) {
213     const StorageLocation *Loc = Entry.first;
214     assert(Loc != nullptr);
215 
216     Value *Val = Entry.second;
217     assert(Val != nullptr);
218 
219     auto It = Other.LocToVal.find(Loc);
220     if (It == Other.LocToVal.end())
221       return false;
222     assert(It->second != nullptr);
223 
224     if (!equivalentValues(Loc->getType(), Val, *this, It->second, Other, Model))
225       return false;
226   }
227 
228   return true;
229 }
230 
231 LatticeJoinEffect Environment::join(const Environment &Other,
232                                     Environment::ValueModel &Model) {
233   assert(DACtx == Other.DACtx);
234 
235   auto Effect = LatticeJoinEffect::Unchanged;
236 
237   const unsigned DeclToLocSizeBefore = DeclToLoc.size();
238   DeclToLoc = intersectDenseMaps(DeclToLoc, Other.DeclToLoc);
239   if (DeclToLocSizeBefore != DeclToLoc.size())
240     Effect = LatticeJoinEffect::Changed;
241 
242   const unsigned ExprToLocSizeBefore = ExprToLoc.size();
243   ExprToLoc = intersectDenseMaps(ExprToLoc, Other.ExprToLoc);
244   if (ExprToLocSizeBefore != ExprToLoc.size())
245     Effect = LatticeJoinEffect::Changed;
246 
247   const unsigned MemberLocToStructSizeBefore = MemberLocToStruct.size();
248   MemberLocToStruct =
249       intersectDenseMaps(MemberLocToStruct, Other.MemberLocToStruct);
250   if (MemberLocToStructSizeBefore != MemberLocToStruct.size())
251     Effect = LatticeJoinEffect::Changed;
252 
253   // Move `LocToVal` so that `Environment::ValueModel::merge` can safely assign
254   // values to storage locations while this code iterates over the current
255   // assignments.
256   llvm::DenseMap<const StorageLocation *, Value *> OldLocToVal =
257       std::move(LocToVal);
258   for (auto &Entry : OldLocToVal) {
259     const StorageLocation *Loc = Entry.first;
260     assert(Loc != nullptr);
261 
262     Value *Val = Entry.second;
263     assert(Val != nullptr);
264 
265     auto It = Other.LocToVal.find(Loc);
266     if (It == Other.LocToVal.end())
267       continue;
268     assert(It->second != nullptr);
269 
270     if (equivalentValues(Loc->getType(), Val, *this, It->second, Other,
271                          Model)) {
272       LocToVal.insert({Loc, Val});
273       continue;
274     }
275 
276     // FIXME: Consider destroying `MergedValue` immediately if
277     // `ValueModel::merge` returns false to avoid storing unneeded values in
278     // `DACtx`.
279     if (Value *MergedVal = createValue(Loc->getType()))
280       if (Model.merge(Loc->getType(), *Val, *this, *It->second, Other,
281                       *MergedVal, *this))
282         LocToVal.insert({Loc, MergedVal});
283   }
284   if (OldLocToVal.size() != LocToVal.size())
285     Effect = LatticeJoinEffect::Changed;
286 
287   FlowConditionConstraints = joinConstraints(DACtx, FlowConditionConstraints,
288                                              Other.FlowConditionConstraints);
289 
290   return Effect;
291 }
292 
293 StorageLocation &Environment::createStorageLocation(QualType Type) {
294   assert(!Type.isNull());
295   if (Type->isStructureOrClassType() || Type->isUnionType()) {
296     // FIXME: Explore options to avoid eager initialization of fields as some of
297     // them might not be needed for a particular analysis.
298     llvm::DenseMap<const ValueDecl *, StorageLocation *> FieldLocs;
299     for (const FieldDecl *Field : Type->getAsRecordDecl()->fields()) {
300       FieldLocs.insert({Field, &createStorageLocation(Field->getType())});
301     }
302     return takeOwnership(
303         std::make_unique<AggregateStorageLocation>(Type, std::move(FieldLocs)));
304   }
305   return takeOwnership(std::make_unique<ScalarStorageLocation>(Type));
306 }
307 
308 StorageLocation &Environment::createStorageLocation(const VarDecl &D) {
309   // Evaluated declarations are always assigned the same storage locations to
310   // ensure that the environment stabilizes across loop iterations. Storage
311   // locations for evaluated declarations are stored in the analysis context.
312   if (auto *Loc = DACtx->getStorageLocation(D))
313     return *Loc;
314   auto &Loc = createStorageLocation(D.getType());
315   DACtx->setStorageLocation(D, Loc);
316   return Loc;
317 }
318 
319 StorageLocation &Environment::createStorageLocation(const Expr &E) {
320   // Evaluated expressions are always assigned the same storage locations to
321   // ensure that the environment stabilizes across loop iterations. Storage
322   // locations for evaluated expressions are stored in the analysis context.
323   if (auto *Loc = DACtx->getStorageLocation(E))
324     return *Loc;
325   auto &Loc = createStorageLocation(E.getType());
326   DACtx->setStorageLocation(E, Loc);
327   return Loc;
328 }
329 
330 void Environment::setStorageLocation(const ValueDecl &D, StorageLocation &Loc) {
331   assert(DeclToLoc.find(&D) == DeclToLoc.end());
332   DeclToLoc[&D] = &Loc;
333 }
334 
335 StorageLocation *Environment::getStorageLocation(const ValueDecl &D,
336                                                  SkipPast SP) const {
337   auto It = DeclToLoc.find(&D);
338   return It == DeclToLoc.end() ? nullptr : &skip(*It->second, SP);
339 }
340 
341 void Environment::setStorageLocation(const Expr &E, StorageLocation &Loc) {
342   assert(ExprToLoc.find(&E) == ExprToLoc.end());
343   ExprToLoc[&E] = &Loc;
344 }
345 
346 StorageLocation *Environment::getStorageLocation(const Expr &E,
347                                                  SkipPast SP) const {
348   auto It = ExprToLoc.find(&E);
349   return It == ExprToLoc.end() ? nullptr : &skip(*It->second, SP);
350 }
351 
352 StorageLocation *Environment::getThisPointeeStorageLocation() const {
353   return DACtx->getThisPointeeStorageLocation();
354 }
355 
356 void Environment::setValue(const StorageLocation &Loc, Value &Val) {
357   LocToVal[&Loc] = &Val;
358 
359   if (auto *StructVal = dyn_cast<StructValue>(&Val)) {
360     auto &AggregateLoc = *cast<AggregateStorageLocation>(&Loc);
361 
362     const QualType Type = AggregateLoc.getType();
363     assert(Type->isStructureOrClassType());
364 
365     for (const FieldDecl *Field : Type->getAsRecordDecl()->fields()) {
366       assert(Field != nullptr);
367       StorageLocation &FieldLoc = AggregateLoc.getChild(*Field);
368       MemberLocToStruct[&FieldLoc] = std::make_pair(StructVal, Field);
369       setValue(FieldLoc, StructVal->getChild(*Field));
370     }
371   }
372 
373   auto IT = MemberLocToStruct.find(&Loc);
374   if (IT != MemberLocToStruct.end()) {
375     // `Loc` is the location of a struct member so we need to also update the
376     // value of the member in the corresponding `StructValue`.
377 
378     assert(IT->second.first != nullptr);
379     StructValue &StructVal = *IT->second.first;
380 
381     assert(IT->second.second != nullptr);
382     const ValueDecl &Member = *IT->second.second;
383 
384     StructVal.setChild(Member, Val);
385   }
386 }
387 
388 Value *Environment::getValue(const StorageLocation &Loc) const {
389   auto It = LocToVal.find(&Loc);
390   return It == LocToVal.end() ? nullptr : It->second;
391 }
392 
393 Value *Environment::getValue(const ValueDecl &D, SkipPast SP) const {
394   auto *Loc = getStorageLocation(D, SP);
395   if (Loc == nullptr)
396     return nullptr;
397   return getValue(*Loc);
398 }
399 
400 Value *Environment::getValue(const Expr &E, SkipPast SP) const {
401   auto *Loc = getStorageLocation(E, SP);
402   if (Loc == nullptr)
403     return nullptr;
404   return getValue(*Loc);
405 }
406 
407 Value *Environment::createValue(QualType Type) {
408   llvm::DenseSet<QualType> Visited;
409   int CreatedValuesCount = 0;
410   Value *Val = createValueUnlessSelfReferential(Type, Visited, /*Depth=*/0,
411                                                 CreatedValuesCount);
412   if (CreatedValuesCount > MaxCompositeValueSize) {
413     llvm::errs() << "Attempting to initialize a huge value of type: "
414                  << Type.getAsString() << "\n";
415   }
416   return Val;
417 }
418 
419 Value *Environment::createValueUnlessSelfReferential(
420     QualType Type, llvm::DenseSet<QualType> &Visited, int Depth,
421     int &CreatedValuesCount) {
422   assert(!Type.isNull());
423 
424   // Allow unlimited fields at depth 1; only cap at deeper nesting levels.
425   if ((Depth > 1 && CreatedValuesCount > MaxCompositeValueSize) ||
426       Depth > MaxCompositeValueDepth)
427     return nullptr;
428 
429   if (Type->isBooleanType()) {
430     CreatedValuesCount++;
431     return &makeAtomicBoolValue();
432   }
433 
434   if (Type->isIntegerType()) {
435     CreatedValuesCount++;
436     return &takeOwnership(std::make_unique<IntegerValue>());
437   }
438 
439   if (Type->isReferenceType()) {
440     CreatedValuesCount++;
441     QualType PointeeType = Type->getAs<ReferenceType>()->getPointeeType();
442     auto &PointeeLoc = createStorageLocation(PointeeType);
443 
444     if (!Visited.contains(PointeeType.getCanonicalType())) {
445       Visited.insert(PointeeType.getCanonicalType());
446       Value *PointeeVal = createValueUnlessSelfReferential(
447           PointeeType, Visited, Depth, CreatedValuesCount);
448       Visited.erase(PointeeType.getCanonicalType());
449 
450       if (PointeeVal != nullptr)
451         setValue(PointeeLoc, *PointeeVal);
452     }
453 
454     return &takeOwnership(std::make_unique<ReferenceValue>(PointeeLoc));
455   }
456 
457   if (Type->isPointerType()) {
458     CreatedValuesCount++;
459     QualType PointeeType = Type->getAs<PointerType>()->getPointeeType();
460     auto &PointeeLoc = createStorageLocation(PointeeType);
461 
462     if (!Visited.contains(PointeeType.getCanonicalType())) {
463       Visited.insert(PointeeType.getCanonicalType());
464       Value *PointeeVal = createValueUnlessSelfReferential(
465           PointeeType, Visited, Depth, CreatedValuesCount);
466       Visited.erase(PointeeType.getCanonicalType());
467 
468       if (PointeeVal != nullptr)
469         setValue(PointeeLoc, *PointeeVal);
470     }
471 
472     return &takeOwnership(std::make_unique<PointerValue>(PointeeLoc));
473   }
474 
475   if (Type->isStructureOrClassType()) {
476     CreatedValuesCount++;
477     // FIXME: Initialize only fields that are accessed in the context that is
478     // being analyzed.
479     llvm::DenseMap<const ValueDecl *, Value *> FieldValues;
480     for (const FieldDecl *Field : Type->getAsRecordDecl()->fields()) {
481       assert(Field != nullptr);
482 
483       QualType FieldType = Field->getType();
484       if (Visited.contains(FieldType.getCanonicalType()))
485         continue;
486 
487       Visited.insert(FieldType.getCanonicalType());
488       FieldValues.insert(
489           {Field, createValueUnlessSelfReferential(
490                       FieldType, Visited, Depth + 1, CreatedValuesCount)});
491       Visited.erase(FieldType.getCanonicalType());
492     }
493 
494     return &takeOwnership(
495         std::make_unique<StructValue>(std::move(FieldValues)));
496   }
497 
498   return nullptr;
499 }
500 
501 StorageLocation &Environment::skip(StorageLocation &Loc, SkipPast SP) const {
502   switch (SP) {
503   case SkipPast::None:
504     return Loc;
505   case SkipPast::Reference:
506     // References cannot be chained so we only need to skip past one level of
507     // indirection.
508     if (auto *Val = dyn_cast_or_null<ReferenceValue>(getValue(Loc)))
509       return Val->getPointeeLoc();
510     return Loc;
511   case SkipPast::ReferenceThenPointer:
512     StorageLocation &LocPastRef = skip(Loc, SkipPast::Reference);
513     if (auto *Val = dyn_cast_or_null<PointerValue>(getValue(LocPastRef)))
514       return Val->getPointeeLoc();
515     return LocPastRef;
516   }
517   llvm_unreachable("bad SkipPast kind");
518 }
519 
520 const StorageLocation &Environment::skip(const StorageLocation &Loc,
521                                          SkipPast SP) const {
522   return skip(*const_cast<StorageLocation *>(&Loc), SP);
523 }
524 
525 void Environment::addToFlowCondition(BoolValue &Val) {
526   FlowConditionConstraints.insert(&Val);
527 }
528 
529 bool Environment::flowConditionImplies(BoolValue &Val) const {
530   // Returns true if and only if truth assignment of the flow condition implies
531   // that `Val` is also true. We prove whether or not this property holds by
532   // reducing the problem to satisfiability checking. In other words, we attempt
533   // to show that assuming `Val` is false makes the constraints induced by the
534   // flow condition unsatisfiable.
535   llvm::DenseSet<BoolValue *> Constraints = {
536       &makeNot(Val), &getBoolLiteralValue(true),
537       &makeNot(getBoolLiteralValue(false))};
538   Constraints.insert(FlowConditionConstraints.begin(),
539                      FlowConditionConstraints.end());
540   return DACtx->getSolver().solve(std::move(Constraints)) ==
541          Solver::Result::Unsatisfiable;
542 }
543 
544 } // namespace dataflow
545 } // namespace clang
546