1 //===-- DataflowEnvironment.cpp ---------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file defines an Environment class that is used by dataflow analyses
10 //  that run over Control-Flow Graphs (CFGs) to keep track of the state of the
11 //  program at given program points.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h"
16 #include "clang/AST/Decl.h"
17 #include "clang/AST/DeclCXX.h"
18 #include "clang/AST/Type.h"
19 #include "clang/Analysis/FlowSensitive/DataflowLattice.h"
20 #include "clang/Analysis/FlowSensitive/StorageLocation.h"
21 #include "clang/Analysis/FlowSensitive/Value.h"
22 #include "llvm/ADT/DenseMap.h"
23 #include "llvm/ADT/DenseSet.h"
24 #include "llvm/Support/ErrorHandling.h"
25 #include <cassert>
26 #include <memory>
27 #include <utility>
28 
29 namespace clang {
30 namespace dataflow {
31 
32 // FIXME: convert these to parameters of the analysis or environment. Current
33 // settings have been experimentaly validated, but only for a particular
34 // analysis.
35 static constexpr int MaxCompositeValueDepth = 3;
36 static constexpr int MaxCompositeValueSize = 1000;
37 
38 /// Returns a map consisting of key-value entries that are present in both maps.
39 template <typename K, typename V>
40 llvm::DenseMap<K, V> intersectDenseMaps(const llvm::DenseMap<K, V> &Map1,
41                                         const llvm::DenseMap<K, V> &Map2) {
42   llvm::DenseMap<K, V> Result;
43   for (auto &Entry : Map1) {
44     auto It = Map2.find(Entry.first);
45     if (It != Map2.end() && Entry.second == It->second)
46       Result.insert({Entry.first, Entry.second});
47   }
48   return Result;
49 }
50 
51 /// Returns true if and only if `Val1` is equivalent to `Val2`.
52 static bool equivalentValues(QualType Type, Value *Val1,
53                              const Environment &Env1, Value *Val2,
54                              const Environment &Env2,
55                              Environment::ValueModel &Model) {
56   if (Val1 == Val2)
57     return true;
58 
59   if (auto *IndVal1 = dyn_cast<IndirectionValue>(Val1)) {
60     auto *IndVal2 = cast<IndirectionValue>(Val2);
61     assert(IndVal1->getKind() == IndVal2->getKind());
62     return &IndVal1->getPointeeLoc() == &IndVal2->getPointeeLoc();
63   }
64 
65   return Model.compareEquivalent(Type, *Val1, Env1, *Val2, Env2);
66 }
67 
68 /// Initializes a global storage value.
69 static void initGlobalVar(const VarDecl &D, Environment &Env) {
70   if (!D.hasGlobalStorage() ||
71       Env.getStorageLocation(D, SkipPast::None) != nullptr)
72     return;
73 
74   auto &Loc = Env.createStorageLocation(D);
75   Env.setStorageLocation(D, Loc);
76   if (auto *Val = Env.createValue(D.getType()))
77     Env.setValue(Loc, *Val);
78 }
79 
80 /// Initializes a global storage value.
81 static void initGlobalVar(const Decl &D, Environment &Env) {
82   if (auto *V = dyn_cast<VarDecl>(&D))
83     initGlobalVar(*V, Env);
84 }
85 
86 /// Initializes global storage values that are declared or referenced from
87 /// sub-statements of `S`.
88 // FIXME: Add support for resetting globals after function calls to enable
89 // the implementation of sound analyses.
90 static void initGlobalVars(const Stmt &S, Environment &Env) {
91   for (auto *Child : S.children()) {
92     if (Child != nullptr)
93       initGlobalVars(*Child, Env);
94   }
95 
96   if (auto *DS = dyn_cast<DeclStmt>(&S)) {
97     if (DS->isSingleDecl()) {
98       initGlobalVar(*DS->getSingleDecl(), Env);
99     } else {
100       for (auto *D : DS->getDeclGroup())
101         initGlobalVar(*D, Env);
102     }
103   } else if (auto *E = dyn_cast<DeclRefExpr>(&S)) {
104     initGlobalVar(*E->getDecl(), Env);
105   } else if (auto *E = dyn_cast<MemberExpr>(&S)) {
106     initGlobalVar(*E->getMemberDecl(), Env);
107   }
108 }
109 
110 /// Returns constraints that represent the disjunction of `Constraints1` and
111 /// `Constraints2`.
112 ///
113 /// Requirements:
114 ///
115 ///  The elements of `Constraints1` and `Constraints2` must not be null.
116 llvm::DenseSet<BoolValue *>
117 joinConstraints(DataflowAnalysisContext *Context,
118                 const llvm::DenseSet<BoolValue *> &Constraints1,
119                 const llvm::DenseSet<BoolValue *> &Constraints2) {
120   // `(X ^ Y) v (X ^ Z)` is logically equivalent to `X ^ (Y v Z)`. Therefore, to
121   // avoid unnecessarily expanding the resulting set of constraints, we will add
122   // all common constraints of `Constraints1` and `Constraints2` directly and
123   // add a disjunction of the constraints that are not common.
124 
125   llvm::DenseSet<BoolValue *> JoinedConstraints;
126 
127   if (Constraints1.empty() || Constraints2.empty()) {
128     // Disjunction of empty set and non-empty set is represented as empty set.
129     return JoinedConstraints;
130   }
131 
132   BoolValue *Val1 = nullptr;
133   for (BoolValue *Constraint : Constraints1) {
134     if (Constraints2.contains(Constraint)) {
135       // Add common constraints directly to `JoinedConstraints`.
136       JoinedConstraints.insert(Constraint);
137     } else if (Val1 == nullptr) {
138       Val1 = Constraint;
139     } else {
140       Val1 = &Context->getOrCreateConjunctionValue(*Val1, *Constraint);
141     }
142   }
143 
144   BoolValue *Val2 = nullptr;
145   for (BoolValue *Constraint : Constraints2) {
146     // Common constraints are added to `JoinedConstraints` above.
147     if (Constraints1.contains(Constraint)) {
148       continue;
149     }
150     if (Val2 == nullptr) {
151       Val2 = Constraint;
152     } else {
153       Val2 = &Context->getOrCreateConjunctionValue(*Val2, *Constraint);
154     }
155   }
156 
157   // An empty set of constraints (represented as a null value) is interpreted as
158   // `true` and `true v X` is logically equivalent to `true` so we need to add a
159   // constraint only if both `Val1` and `Val2` are not null.
160   if (Val1 != nullptr && Val2 != nullptr)
161     JoinedConstraints.insert(
162         &Context->getOrCreateDisjunctionValue(*Val1, *Val2));
163 
164   return JoinedConstraints;
165 }
166 
167 static void
168 getFieldsFromClassHierarchy(QualType Type, bool IgnorePrivateFields,
169                             llvm::DenseSet<const FieldDecl *> &Fields) {
170   if (Type->isIncompleteType() || Type->isDependentType() ||
171       !Type->isRecordType())
172     return;
173 
174   for (const FieldDecl *Field : Type->getAsRecordDecl()->fields()) {
175     if (IgnorePrivateFields &&
176         (Field->getAccess() == AS_private ||
177          (Field->getAccess() == AS_none && Type->getAsRecordDecl()->isClass())))
178       continue;
179     Fields.insert(Field);
180   }
181   if (auto *CXXRecord = Type->getAsCXXRecordDecl()) {
182     for (const CXXBaseSpecifier &Base : CXXRecord->bases()) {
183       // Ignore private fields (including default access in C++ classes) in
184       // base classes, because they are not visible in derived classes.
185       getFieldsFromClassHierarchy(Base.getType(), /*IgnorePrivateFields=*/true,
186                                   Fields);
187     }
188   }
189 }
190 
191 /// Gets the set of all fields accesible from the type.
192 ///
193 /// FIXME: Does not precisely handle non-virtual diamond inheritance. A single
194 /// field decl will be modeled for all instances of the inherited field.
195 static llvm::DenseSet<const FieldDecl *>
196 getAccessibleObjectFields(QualType Type) {
197   llvm::DenseSet<const FieldDecl *> Fields;
198   // Don't ignore private fields for the class itself, only its super classes.
199   getFieldsFromClassHierarchy(Type, /*IgnorePrivateFields=*/false, Fields);
200   return Fields;
201 }
202 
203 Environment::Environment(DataflowAnalysisContext &DACtx,
204                          const DeclContext &DeclCtx)
205     : Environment(DACtx) {
206   if (const auto *FuncDecl = dyn_cast<FunctionDecl>(&DeclCtx)) {
207     assert(FuncDecl->getBody() != nullptr);
208     initGlobalVars(*FuncDecl->getBody(), *this);
209     for (const auto *ParamDecl : FuncDecl->parameters()) {
210       assert(ParamDecl != nullptr);
211       auto &ParamLoc = createStorageLocation(*ParamDecl);
212       setStorageLocation(*ParamDecl, ParamLoc);
213       if (Value *ParamVal = createValue(ParamDecl->getType()))
214         setValue(ParamLoc, *ParamVal);
215     }
216   }
217 
218   if (const auto *MethodDecl = dyn_cast<CXXMethodDecl>(&DeclCtx)) {
219     if (!MethodDecl->isStatic()) {
220       QualType ThisPointeeType = MethodDecl->getThisObjectType();
221       // FIXME: Add support for union types.
222       if (!ThisPointeeType->isUnionType()) {
223         auto &ThisPointeeLoc = createStorageLocation(ThisPointeeType);
224         DACtx.setThisPointeeStorageLocation(ThisPointeeLoc);
225         if (Value *ThisPointeeVal = createValue(ThisPointeeType))
226           setValue(ThisPointeeLoc, *ThisPointeeVal);
227       }
228     }
229   }
230 }
231 
232 bool Environment::equivalentTo(const Environment &Other,
233                                Environment::ValueModel &Model) const {
234   assert(DACtx == Other.DACtx);
235 
236   if (DeclToLoc != Other.DeclToLoc)
237     return false;
238 
239   if (ExprToLoc != Other.ExprToLoc)
240     return false;
241 
242   if (MemberLocToStruct != Other.MemberLocToStruct)
243     return false;
244 
245   if (LocToVal.size() != Other.LocToVal.size())
246     return false;
247 
248   for (auto &Entry : LocToVal) {
249     const StorageLocation *Loc = Entry.first;
250     assert(Loc != nullptr);
251 
252     Value *Val = Entry.second;
253     assert(Val != nullptr);
254 
255     auto It = Other.LocToVal.find(Loc);
256     if (It == Other.LocToVal.end())
257       return false;
258     assert(It->second != nullptr);
259 
260     if (!equivalentValues(Loc->getType(), Val, *this, It->second, Other, Model))
261       return false;
262   }
263 
264   return true;
265 }
266 
267 LatticeJoinEffect Environment::join(const Environment &Other,
268                                     Environment::ValueModel &Model) {
269   assert(DACtx == Other.DACtx);
270 
271   auto Effect = LatticeJoinEffect::Unchanged;
272 
273   const unsigned DeclToLocSizeBefore = DeclToLoc.size();
274   DeclToLoc = intersectDenseMaps(DeclToLoc, Other.DeclToLoc);
275   if (DeclToLocSizeBefore != DeclToLoc.size())
276     Effect = LatticeJoinEffect::Changed;
277 
278   const unsigned ExprToLocSizeBefore = ExprToLoc.size();
279   ExprToLoc = intersectDenseMaps(ExprToLoc, Other.ExprToLoc);
280   if (ExprToLocSizeBefore != ExprToLoc.size())
281     Effect = LatticeJoinEffect::Changed;
282 
283   const unsigned MemberLocToStructSizeBefore = MemberLocToStruct.size();
284   MemberLocToStruct =
285       intersectDenseMaps(MemberLocToStruct, Other.MemberLocToStruct);
286   if (MemberLocToStructSizeBefore != MemberLocToStruct.size())
287     Effect = LatticeJoinEffect::Changed;
288 
289   // Move `LocToVal` so that `Environment::ValueModel::merge` can safely assign
290   // values to storage locations while this code iterates over the current
291   // assignments.
292   llvm::DenseMap<const StorageLocation *, Value *> OldLocToVal =
293       std::move(LocToVal);
294   for (auto &Entry : OldLocToVal) {
295     const StorageLocation *Loc = Entry.first;
296     assert(Loc != nullptr);
297 
298     Value *Val = Entry.second;
299     assert(Val != nullptr);
300 
301     auto It = Other.LocToVal.find(Loc);
302     if (It == Other.LocToVal.end())
303       continue;
304     assert(It->second != nullptr);
305 
306     if (equivalentValues(Loc->getType(), Val, *this, It->second, Other,
307                          Model)) {
308       LocToVal.insert({Loc, Val});
309       continue;
310     }
311 
312     // FIXME: Consider destroying `MergedValue` immediately if
313     // `ValueModel::merge` returns false to avoid storing unneeded values in
314     // `DACtx`.
315     if (Value *MergedVal = createValue(Loc->getType()))
316       if (Model.merge(Loc->getType(), *Val, *this, *It->second, Other,
317                       *MergedVal, *this))
318         LocToVal.insert({Loc, MergedVal});
319   }
320   if (OldLocToVal.size() != LocToVal.size())
321     Effect = LatticeJoinEffect::Changed;
322 
323   FlowConditionConstraints = joinConstraints(DACtx, FlowConditionConstraints,
324                                              Other.FlowConditionConstraints);
325 
326   return Effect;
327 }
328 
329 StorageLocation &Environment::createStorageLocation(QualType Type) {
330   assert(!Type.isNull());
331   if (Type->isStructureOrClassType() || Type->isUnionType()) {
332     // FIXME: Explore options to avoid eager initialization of fields as some of
333     // them might not be needed for a particular analysis.
334     llvm::DenseMap<const ValueDecl *, StorageLocation *> FieldLocs;
335     for (const FieldDecl *Field : getAccessibleObjectFields(Type)) {
336       FieldLocs.insert({Field, &createStorageLocation(Field->getType())});
337     }
338     return takeOwnership(
339         std::make_unique<AggregateStorageLocation>(Type, std::move(FieldLocs)));
340   }
341   return takeOwnership(std::make_unique<ScalarStorageLocation>(Type));
342 }
343 
344 StorageLocation &Environment::createStorageLocation(const VarDecl &D) {
345   // Evaluated declarations are always assigned the same storage locations to
346   // ensure that the environment stabilizes across loop iterations. Storage
347   // locations for evaluated declarations are stored in the analysis context.
348   if (auto *Loc = DACtx->getStorageLocation(D))
349     return *Loc;
350   auto &Loc = createStorageLocation(D.getType());
351   DACtx->setStorageLocation(D, Loc);
352   return Loc;
353 }
354 
355 StorageLocation &Environment::createStorageLocation(const Expr &E) {
356   // Evaluated expressions are always assigned the same storage locations to
357   // ensure that the environment stabilizes across loop iterations. Storage
358   // locations for evaluated expressions are stored in the analysis context.
359   if (auto *Loc = DACtx->getStorageLocation(E))
360     return *Loc;
361   auto &Loc = createStorageLocation(E.getType());
362   DACtx->setStorageLocation(E, Loc);
363   return Loc;
364 }
365 
366 void Environment::setStorageLocation(const ValueDecl &D, StorageLocation &Loc) {
367   assert(DeclToLoc.find(&D) == DeclToLoc.end());
368   DeclToLoc[&D] = &Loc;
369 }
370 
371 StorageLocation *Environment::getStorageLocation(const ValueDecl &D,
372                                                  SkipPast SP) const {
373   auto It = DeclToLoc.find(&D);
374   return It == DeclToLoc.end() ? nullptr : &skip(*It->second, SP);
375 }
376 
377 void Environment::setStorageLocation(const Expr &E, StorageLocation &Loc) {
378   assert(ExprToLoc.find(&E) == ExprToLoc.end());
379   ExprToLoc[&E] = &Loc;
380 }
381 
382 StorageLocation *Environment::getStorageLocation(const Expr &E,
383                                                  SkipPast SP) const {
384   // FIXME: Add a test with parens.
385   auto It = ExprToLoc.find(E.IgnoreParens());
386   return It == ExprToLoc.end() ? nullptr : &skip(*It->second, SP);
387 }
388 
389 StorageLocation *Environment::getThisPointeeStorageLocation() const {
390   return DACtx->getThisPointeeStorageLocation();
391 }
392 
393 void Environment::setValue(const StorageLocation &Loc, Value &Val) {
394   LocToVal[&Loc] = &Val;
395 
396   if (auto *StructVal = dyn_cast<StructValue>(&Val)) {
397     auto &AggregateLoc = *cast<AggregateStorageLocation>(&Loc);
398 
399     const QualType Type = AggregateLoc.getType();
400     assert(Type->isStructureOrClassType());
401 
402     for (const FieldDecl *Field : getAccessibleObjectFields(Type)) {
403       assert(Field != nullptr);
404       StorageLocation &FieldLoc = AggregateLoc.getChild(*Field);
405       MemberLocToStruct[&FieldLoc] = std::make_pair(StructVal, Field);
406       if (auto *FieldVal = StructVal->getChild(*Field))
407         setValue(FieldLoc, *FieldVal);
408     }
409   }
410 
411   auto IT = MemberLocToStruct.find(&Loc);
412   if (IT != MemberLocToStruct.end()) {
413     // `Loc` is the location of a struct member so we need to also update the
414     // value of the member in the corresponding `StructValue`.
415 
416     assert(IT->second.first != nullptr);
417     StructValue &StructVal = *IT->second.first;
418 
419     assert(IT->second.second != nullptr);
420     const ValueDecl &Member = *IT->second.second;
421 
422     StructVal.setChild(Member, Val);
423   }
424 }
425 
426 Value *Environment::getValue(const StorageLocation &Loc) const {
427   auto It = LocToVal.find(&Loc);
428   return It == LocToVal.end() ? nullptr : It->second;
429 }
430 
431 Value *Environment::getValue(const ValueDecl &D, SkipPast SP) const {
432   auto *Loc = getStorageLocation(D, SP);
433   if (Loc == nullptr)
434     return nullptr;
435   return getValue(*Loc);
436 }
437 
438 Value *Environment::getValue(const Expr &E, SkipPast SP) const {
439   auto *Loc = getStorageLocation(E, SP);
440   if (Loc == nullptr)
441     return nullptr;
442   return getValue(*Loc);
443 }
444 
445 Value *Environment::createValue(QualType Type) {
446   llvm::DenseSet<QualType> Visited;
447   int CreatedValuesCount = 0;
448   Value *Val = createValueUnlessSelfReferential(Type, Visited, /*Depth=*/0,
449                                                 CreatedValuesCount);
450   if (CreatedValuesCount > MaxCompositeValueSize) {
451     llvm::errs() << "Attempting to initialize a huge value of type: "
452                  << Type.getAsString() << "\n";
453   }
454   return Val;
455 }
456 
457 Value *Environment::createValueUnlessSelfReferential(
458     QualType Type, llvm::DenseSet<QualType> &Visited, int Depth,
459     int &CreatedValuesCount) {
460   assert(!Type.isNull());
461 
462   // Allow unlimited fields at depth 1; only cap at deeper nesting levels.
463   if ((Depth > 1 && CreatedValuesCount > MaxCompositeValueSize) ||
464       Depth > MaxCompositeValueDepth)
465     return nullptr;
466 
467   if (Type->isBooleanType()) {
468     CreatedValuesCount++;
469     return &makeAtomicBoolValue();
470   }
471 
472   if (Type->isIntegerType()) {
473     CreatedValuesCount++;
474     return &takeOwnership(std::make_unique<IntegerValue>());
475   }
476 
477   if (Type->isReferenceType()) {
478     CreatedValuesCount++;
479     QualType PointeeType = Type->castAs<ReferenceType>()->getPointeeType();
480     auto &PointeeLoc = createStorageLocation(PointeeType);
481 
482     if (!Visited.contains(PointeeType.getCanonicalType())) {
483       Visited.insert(PointeeType.getCanonicalType());
484       Value *PointeeVal = createValueUnlessSelfReferential(
485           PointeeType, Visited, Depth, CreatedValuesCount);
486       Visited.erase(PointeeType.getCanonicalType());
487 
488       if (PointeeVal != nullptr)
489         setValue(PointeeLoc, *PointeeVal);
490     }
491 
492     return &takeOwnership(std::make_unique<ReferenceValue>(PointeeLoc));
493   }
494 
495   if (Type->isPointerType()) {
496     CreatedValuesCount++;
497     QualType PointeeType = Type->castAs<PointerType>()->getPointeeType();
498     auto &PointeeLoc = createStorageLocation(PointeeType);
499 
500     if (!Visited.contains(PointeeType.getCanonicalType())) {
501       Visited.insert(PointeeType.getCanonicalType());
502       Value *PointeeVal = createValueUnlessSelfReferential(
503           PointeeType, Visited, Depth, CreatedValuesCount);
504       Visited.erase(PointeeType.getCanonicalType());
505 
506       if (PointeeVal != nullptr)
507         setValue(PointeeLoc, *PointeeVal);
508     }
509 
510     return &takeOwnership(std::make_unique<PointerValue>(PointeeLoc));
511   }
512 
513   if (Type->isStructureOrClassType()) {
514     CreatedValuesCount++;
515     // FIXME: Initialize only fields that are accessed in the context that is
516     // being analyzed.
517     llvm::DenseMap<const ValueDecl *, Value *> FieldValues;
518     for (const FieldDecl *Field : getAccessibleObjectFields(Type)) {
519       assert(Field != nullptr);
520 
521       QualType FieldType = Field->getType();
522       if (Visited.contains(FieldType.getCanonicalType()))
523         continue;
524 
525       Visited.insert(FieldType.getCanonicalType());
526       if (auto *FieldValue = createValueUnlessSelfReferential(
527               FieldType, Visited, Depth + 1, CreatedValuesCount))
528         FieldValues.insert({Field, FieldValue});
529       Visited.erase(FieldType.getCanonicalType());
530     }
531 
532     return &takeOwnership(
533         std::make_unique<StructValue>(std::move(FieldValues)));
534   }
535 
536   return nullptr;
537 }
538 
539 StorageLocation &Environment::skip(StorageLocation &Loc, SkipPast SP) const {
540   switch (SP) {
541   case SkipPast::None:
542     return Loc;
543   case SkipPast::Reference:
544     // References cannot be chained so we only need to skip past one level of
545     // indirection.
546     if (auto *Val = dyn_cast_or_null<ReferenceValue>(getValue(Loc)))
547       return Val->getPointeeLoc();
548     return Loc;
549   case SkipPast::ReferenceThenPointer:
550     StorageLocation &LocPastRef = skip(Loc, SkipPast::Reference);
551     if (auto *Val = dyn_cast_or_null<PointerValue>(getValue(LocPastRef)))
552       return Val->getPointeeLoc();
553     return LocPastRef;
554   }
555   llvm_unreachable("bad SkipPast kind");
556 }
557 
558 const StorageLocation &Environment::skip(const StorageLocation &Loc,
559                                          SkipPast SP) const {
560   return skip(*const_cast<StorageLocation *>(&Loc), SP);
561 }
562 
563 void Environment::addToFlowCondition(BoolValue &Val) {
564   FlowConditionConstraints.insert(&Val);
565 }
566 
567 bool Environment::flowConditionImplies(BoolValue &Val) const {
568   // Returns true if and only if truth assignment of the flow condition implies
569   // that `Val` is also true. We prove whether or not this property holds by
570   // reducing the problem to satisfiability checking. In other words, we attempt
571   // to show that assuming `Val` is false makes the constraints induced by the
572   // flow condition unsatisfiable.
573   llvm::DenseSet<BoolValue *> Constraints = {
574       &makeNot(Val), &getBoolLiteralValue(true),
575       &makeNot(getBoolLiteralValue(false))};
576   Constraints.insert(FlowConditionConstraints.begin(),
577                      FlowConditionConstraints.end());
578   return DACtx->getSolver().solve(std::move(Constraints)) ==
579          Solver::Result::Unsatisfiable;
580 }
581 
582 } // namespace dataflow
583 } // namespace clang
584