1 //===- TypeBasedAliasAnalysis.cpp - Type-Based Alias Analysis -------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the TypeBasedAliasAnalysis pass, which implements
11 // metadata-based TBAA.
12 //
13 // In LLVM IR, memory does not have types, so LLVM's own type system is not
14 // suitable for doing TBAA. Instead, metadata is added to the IR to describe
15 // a type system of a higher level language. This can be used to implement
16 // typical C/C++ TBAA, but it can also be used to implement custom alias
17 // analysis behavior for other languages.
18 //
19 // We now support two types of metadata format: scalar TBAA and struct-path
20 // aware TBAA. After all testing cases are upgraded to use struct-path aware
21 // TBAA and we can auto-upgrade existing bc files, the support for scalar TBAA
22 // can be dropped.
23 //
24 // The scalar TBAA metadata format is very simple. TBAA MDNodes have up to
25 // three fields, e.g.:
26 //   !0 = !{ !"an example type tree" }
27 //   !1 = !{ !"int", !0 }
28 //   !2 = !{ !"float", !0 }
29 //   !3 = !{ !"const float", !2, i64 1 }
30 //
31 // The first field is an identity field. It can be any value, usually
32 // an MDString, which uniquely identifies the type. The most important
33 // name in the tree is the name of the root node. Two trees with
34 // different root node names are entirely disjoint, even if they
35 // have leaves with common names.
36 //
37 // The second field identifies the type's parent node in the tree, or
38 // is null or omitted for a root node. A type is considered to alias
39 // all of its descendants and all of its ancestors in the tree. Also,
40 // a type is considered to alias all types in other trees, so that
41 // bitcode produced from multiple front-ends is handled conservatively.
42 //
43 // If the third field is present, it's an integer which if equal to 1
44 // indicates that the type is "constant" (meaning pointsToConstantMemory
45 // should return true; see
46 // http://llvm.org/docs/AliasAnalysis.html#OtherItfs).
47 //
48 // With struct-path aware TBAA, the MDNodes attached to an instruction using
49 // "!tbaa" are called path tag nodes.
50 //
51 // The path tag node has 4 fields with the last field being optional.
52 //
53 // The first field is the base type node, it can be a struct type node
54 // or a scalar type node. The second field is the access type node, it
55 // must be a scalar type node. The third field is the offset into the base type.
56 // The last field has the same meaning as the last field of our scalar TBAA:
57 // it's an integer which if equal to 1 indicates that the access is "constant".
58 //
59 // The struct type node has a name and a list of pairs, one pair for each member
60 // of the struct. The first element of each pair is a type node (a struct type
61 // node or a scalar type node), specifying the type of the member, the second
62 // element of each pair is the offset of the member.
63 //
64 // Given an example
65 // typedef struct {
66 //   short s;
67 // } A;
68 // typedef struct {
69 //   uint16_t s;
70 //   A a;
71 // } B;
72 //
73 // For an access to B.a.s, we attach !5 (a path tag node) to the load/store
74 // instruction. The base type is !4 (struct B), the access type is !2 (scalar
75 // type short) and the offset is 4.
76 //
77 // !0 = !{!"Simple C/C++ TBAA"}
78 // !1 = !{!"omnipotent char", !0} // Scalar type node
79 // !2 = !{!"short", !1}           // Scalar type node
80 // !3 = !{!"A", !2, i64 0}        // Struct type node
81 // !4 = !{!"B", !2, i64 0, !3, i64 4}
82 //                                                           // Struct type node
83 // !5 = !{!4, !2, i64 4}          // Path tag node
84 //
85 // The struct type nodes and the scalar type nodes form a type DAG.
86 //         Root (!0)
87 //         char (!1)  -- edge to Root
88 //         short (!2) -- edge to char
89 //         A (!3) -- edge with offset 0 to short
90 //         B (!4) -- edge with offset 0 to short and edge with offset 4 to A
91 //
92 // To check if two tags (tagX and tagY) can alias, we start from the base type
93 // of tagX, follow the edge with the correct offset in the type DAG and adjust
94 // the offset until we reach the base type of tagY or until we reach the Root
95 // node.
96 // If we reach the base type of tagY, compare the adjusted offset with
97 // offset of tagY, return Alias if the offsets are the same, return NoAlias
98 // otherwise.
99 // If we reach the Root node, perform the above starting from base type of tagY
100 // to see if we reach base type of tagX.
101 //
102 // If they have different roots, they're part of different potentially
103 // unrelated type systems, so we return Alias to be conservative.
104 // If neither node is an ancestor of the other and they have the same root,
105 // then we say NoAlias.
106 //
107 // TODO: The current metadata format doesn't support struct
108 // fields. For example:
109 //   struct X {
110 //     double d;
111 //     int i;
112 //   };
113 //   void foo(struct X *x, struct X *y, double *p) {
114 //     *x = *y;
115 //     *p = 0.0;
116 //   }
117 // Struct X has a double member, so the store to *x can alias the store to *p.
118 // Currently it's not possible to precisely describe all the things struct X
119 // aliases, so struct assignments must use conservative TBAA nodes. There's
120 // no scheme for attaching metadata to @llvm.memcpy yet either.
121 //
122 //===----------------------------------------------------------------------===//
123 
124 #include "llvm/Analysis/TypeBasedAliasAnalysis.h"
125 #include "llvm/ADT/SetVector.h"
126 #include "llvm/Analysis/AliasAnalysis.h"
127 #include "llvm/Analysis/MemoryLocation.h"
128 #include "llvm/IR/Constants.h"
129 #include "llvm/IR/DerivedTypes.h"
130 #include "llvm/IR/Instruction.h"
131 #include "llvm/IR/LLVMContext.h"
132 #include "llvm/IR/Metadata.h"
133 #include "llvm/Pass.h"
134 #include "llvm/Support/Casting.h"
135 #include "llvm/Support/CommandLine.h"
136 #include "llvm/Support/ErrorHandling.h"
137 #include <cassert>
138 #include <cstdint>
139 
140 using namespace llvm;
141 
142 // A handy option for disabling TBAA functionality. The same effect can also be
143 // achieved by stripping the !tbaa tags from IR, but this option is sometimes
144 // more convenient.
145 static cl::opt<bool> EnableTBAA("enable-tbaa", cl::init(true));
146 
147 namespace {
148 
149 /// This is a simple wrapper around an MDNode which provides a higher-level
150 /// interface by hiding the details of how alias analysis information is encoded
151 /// in its operands.
152 template<typename MDNodeTy>
153 class TBAANodeImpl {
154   MDNodeTy *Node = nullptr;
155 
156 public:
157   TBAANodeImpl() = default;
158   explicit TBAANodeImpl(MDNodeTy *N) : Node(N) {}
159 
160   /// getNode - Get the MDNode for this TBAANode.
161   MDNodeTy *getNode() const { return Node; }
162 
163   /// getParent - Get this TBAANode's Alias tree parent.
164   TBAANodeImpl<MDNodeTy> getParent() const {
165     if (Node->getNumOperands() < 2)
166       return TBAANodeImpl<MDNodeTy>();
167     MDNodeTy *P = dyn_cast_or_null<MDNodeTy>(Node->getOperand(1));
168     if (!P)
169       return TBAANodeImpl<MDNodeTy>();
170     // Ok, this node has a valid parent. Return it.
171     return TBAANodeImpl<MDNodeTy>(P);
172   }
173 
174   /// Test if this TBAANode represents a type for objects which are
175   /// not modified (by any means) in the context where this
176   /// AliasAnalysis is relevant.
177   bool isTypeImmutable() const {
178     if (Node->getNumOperands() < 3)
179       return false;
180     ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Node->getOperand(2));
181     if (!CI)
182       return false;
183     return CI->getValue()[0];
184   }
185 };
186 
187 /// \name Specializations of \c TBAANodeImpl for const and non const qualified
188 /// \c MDNode.
189 /// @{
190 using TBAANode = TBAANodeImpl<const MDNode>;
191 using MutableTBAANode = TBAANodeImpl<MDNode>;
192 /// @}
193 
194 /// This is a simple wrapper around an MDNode which provides a
195 /// higher-level interface by hiding the details of how alias analysis
196 /// information is encoded in its operands.
197 template<typename MDNodeTy>
198 class TBAAStructTagNodeImpl {
199   /// This node should be created with createTBAAStructTagNode.
200   MDNodeTy *Node;
201 
202 public:
203   explicit TBAAStructTagNodeImpl(MDNodeTy *N) : Node(N) {}
204 
205   /// Get the MDNode for this TBAAStructTagNode.
206   MDNodeTy *getNode() const { return Node; }
207 
208   MDNodeTy *getBaseType() const {
209     return dyn_cast_or_null<MDNode>(Node->getOperand(0));
210   }
211 
212   MDNodeTy *getAccessType() const {
213     return dyn_cast_or_null<MDNode>(Node->getOperand(1));
214   }
215 
216   uint64_t getOffset() const {
217     return mdconst::extract<ConstantInt>(Node->getOperand(2))->getZExtValue();
218   }
219 
220   /// Test if this TBAAStructTagNode represents a type for objects
221   /// which are not modified (by any means) in the context where this
222   /// AliasAnalysis is relevant.
223   bool isTypeImmutable() const {
224     if (Node->getNumOperands() < 4)
225       return false;
226     ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Node->getOperand(3));
227     if (!CI)
228       return false;
229     return CI->getValue()[0];
230   }
231 };
232 
233 /// \name Specializations of \c TBAAStructTagNodeImpl for const and non const
234 /// qualified \c MDNods.
235 /// @{
236 using TBAAStructTagNode = TBAAStructTagNodeImpl<const MDNode>;
237 using MutableTBAAStructTagNode = TBAAStructTagNodeImpl<MDNode>;
238 /// @}
239 
240 /// This is a simple wrapper around an MDNode which provides a
241 /// higher-level interface by hiding the details of how alias analysis
242 /// information is encoded in its operands.
243 class TBAAStructTypeNode {
244   /// This node should be created with createTBAAStructTypeNode.
245   const MDNode *Node = nullptr;
246 
247 public:
248   TBAAStructTypeNode() = default;
249   explicit TBAAStructTypeNode(const MDNode *N) : Node(N) {}
250 
251   /// Get the MDNode for this TBAAStructTypeNode.
252   const MDNode *getNode() const { return Node; }
253 
254   /// Get this TBAAStructTypeNode's field in the type DAG with
255   /// given offset. Update the offset to be relative to the field type.
256   TBAAStructTypeNode getParent(uint64_t &Offset) const {
257     // Parent can be omitted for the root node.
258     if (Node->getNumOperands() < 2)
259       return TBAAStructTypeNode();
260 
261     // Fast path for a scalar type node and a struct type node with a single
262     // field.
263     if (Node->getNumOperands() <= 3) {
264       uint64_t Cur = Node->getNumOperands() == 2
265                          ? 0
266                          : mdconst::extract<ConstantInt>(Node->getOperand(2))
267                                ->getZExtValue();
268       Offset -= Cur;
269       MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1));
270       if (!P)
271         return TBAAStructTypeNode();
272       return TBAAStructTypeNode(P);
273     }
274 
275     // Assume the offsets are in order. We return the previous field if
276     // the current offset is bigger than the given offset.
277     unsigned TheIdx = 0;
278     for (unsigned Idx = 1; Idx < Node->getNumOperands(); Idx += 2) {
279       uint64_t Cur = mdconst::extract<ConstantInt>(Node->getOperand(Idx + 1))
280                          ->getZExtValue();
281       if (Cur > Offset) {
282         assert(Idx >= 3 &&
283                "TBAAStructTypeNode::getParent should have an offset match!");
284         TheIdx = Idx - 2;
285         break;
286       }
287     }
288     // Move along the last field.
289     if (TheIdx == 0)
290       TheIdx = Node->getNumOperands() - 2;
291     uint64_t Cur = mdconst::extract<ConstantInt>(Node->getOperand(TheIdx + 1))
292                        ->getZExtValue();
293     Offset -= Cur;
294     MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(TheIdx));
295     if (!P)
296       return TBAAStructTypeNode();
297     return TBAAStructTypeNode(P);
298   }
299 };
300 
301 } // end anonymous namespace
302 
303 /// Check the first operand of the tbaa tag node, if it is a MDNode, we treat
304 /// it as struct-path aware TBAA format, otherwise, we treat it as scalar TBAA
305 /// format.
306 static bool isStructPathTBAA(const MDNode *MD) {
307   // Anonymous TBAA root starts with a MDNode and dragonegg uses it as
308   // a TBAA tag.
309   return isa<MDNode>(MD->getOperand(0)) && MD->getNumOperands() >= 3;
310 }
311 
312 AliasResult TypeBasedAAResult::alias(const MemoryLocation &LocA,
313                                      const MemoryLocation &LocB) {
314   if (!EnableTBAA)
315     return AAResultBase::alias(LocA, LocB);
316 
317   // Get the attached MDNodes. If either value lacks a tbaa MDNode, we must
318   // be conservative.
319   const MDNode *AM = LocA.AATags.TBAA;
320   if (!AM)
321     return AAResultBase::alias(LocA, LocB);
322   const MDNode *BM = LocB.AATags.TBAA;
323   if (!BM)
324     return AAResultBase::alias(LocA, LocB);
325 
326   // If they may alias, chain to the next AliasAnalysis.
327   if (Aliases(AM, BM))
328     return AAResultBase::alias(LocA, LocB);
329 
330   // Otherwise return a definitive result.
331   return NoAlias;
332 }
333 
334 bool TypeBasedAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
335                                                bool OrLocal) {
336   if (!EnableTBAA)
337     return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
338 
339   const MDNode *M = Loc.AATags.TBAA;
340   if (!M)
341     return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
342 
343   // If this is an "immutable" type, we can assume the pointer is pointing
344   // to constant memory.
345   if ((!isStructPathTBAA(M) && TBAANode(M).isTypeImmutable()) ||
346       (isStructPathTBAA(M) && TBAAStructTagNode(M).isTypeImmutable()))
347     return true;
348 
349   return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
350 }
351 
352 FunctionModRefBehavior
353 TypeBasedAAResult::getModRefBehavior(ImmutableCallSite CS) {
354   if (!EnableTBAA)
355     return AAResultBase::getModRefBehavior(CS);
356 
357   FunctionModRefBehavior Min = FMRB_UnknownModRefBehavior;
358 
359   // If this is an "immutable" type, we can assume the call doesn't write
360   // to memory.
361   if (const MDNode *M = CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
362     if ((!isStructPathTBAA(M) && TBAANode(M).isTypeImmutable()) ||
363         (isStructPathTBAA(M) && TBAAStructTagNode(M).isTypeImmutable()))
364       Min = FMRB_OnlyReadsMemory;
365 
366   return FunctionModRefBehavior(AAResultBase::getModRefBehavior(CS) & Min);
367 }
368 
369 FunctionModRefBehavior TypeBasedAAResult::getModRefBehavior(const Function *F) {
370   // Functions don't have metadata. Just chain to the next implementation.
371   return AAResultBase::getModRefBehavior(F);
372 }
373 
374 ModRefInfo TypeBasedAAResult::getModRefInfo(ImmutableCallSite CS,
375                                             const MemoryLocation &Loc) {
376   if (!EnableTBAA)
377     return AAResultBase::getModRefInfo(CS, Loc);
378 
379   if (const MDNode *L = Loc.AATags.TBAA)
380     if (const MDNode *M =
381             CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
382       if (!Aliases(L, M))
383         return MRI_NoModRef;
384 
385   return AAResultBase::getModRefInfo(CS, Loc);
386 }
387 
388 ModRefInfo TypeBasedAAResult::getModRefInfo(ImmutableCallSite CS1,
389                                             ImmutableCallSite CS2) {
390   if (!EnableTBAA)
391     return AAResultBase::getModRefInfo(CS1, CS2);
392 
393   if (const MDNode *M1 =
394           CS1.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
395     if (const MDNode *M2 =
396             CS2.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
397       if (!Aliases(M1, M2))
398         return MRI_NoModRef;
399 
400   return AAResultBase::getModRefInfo(CS1, CS2);
401 }
402 
403 bool MDNode::isTBAAVtableAccess() const {
404   if (!isStructPathTBAA(this)) {
405     if (getNumOperands() < 1)
406       return false;
407     if (MDString *Tag1 = dyn_cast<MDString>(getOperand(0))) {
408       if (Tag1->getString() == "vtable pointer")
409         return true;
410     }
411     return false;
412   }
413 
414   // For struct-path aware TBAA, we use the access type of the tag.
415   if (getNumOperands() < 2)
416     return false;
417   MDNode *Tag = cast_or_null<MDNode>(getOperand(1));
418   if (!Tag)
419     return false;
420   if (MDString *Tag1 = dyn_cast<MDString>(Tag->getOperand(0))) {
421     if (Tag1->getString() == "vtable pointer")
422       return true;
423   }
424   return false;
425 }
426 
427 MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) {
428   if (!A || !B)
429     return nullptr;
430 
431   if (A == B)
432     return A;
433 
434   // For struct-path aware TBAA, we use the access type of the tag.
435   assert(isStructPathTBAA(A) && isStructPathTBAA(B) &&
436          "Auto upgrade should have taken care of this!");
437   A = cast_or_null<MDNode>(MutableTBAAStructTagNode(A).getAccessType());
438   if (!A)
439     return nullptr;
440   B = cast_or_null<MDNode>(MutableTBAAStructTagNode(B).getAccessType());
441   if (!B)
442     return nullptr;
443 
444   SmallSetVector<MDNode *, 4> PathA;
445   MutableTBAANode TA(A);
446   while (TA.getNode()) {
447     if (PathA.count(TA.getNode()))
448       report_fatal_error("Cycle found in TBAA metadata.");
449     PathA.insert(TA.getNode());
450     TA = TA.getParent();
451   }
452 
453   SmallSetVector<MDNode *, 4> PathB;
454   MutableTBAANode TB(B);
455   while (TB.getNode()) {
456     if (PathB.count(TB.getNode()))
457       report_fatal_error("Cycle found in TBAA metadata.");
458     PathB.insert(TB.getNode());
459     TB = TB.getParent();
460   }
461 
462   int IA = PathA.size() - 1;
463   int IB = PathB.size() - 1;
464 
465   MDNode *Ret = nullptr;
466   while (IA >= 0 && IB >= 0) {
467     if (PathA[IA] == PathB[IB])
468       Ret = PathA[IA];
469     else
470       break;
471     --IA;
472     --IB;
473   }
474 
475   // We either did not find a match, or the only common base "type" is
476   // the root node.  In either case, we don't have any useful TBAA
477   // metadata to attach.
478   if (!Ret || Ret->getNumOperands() < 2)
479     return nullptr;
480 
481   // We need to convert from a type node to a tag node.
482   Type *Int64 = IntegerType::get(A->getContext(), 64);
483   Metadata *Ops[3] = {Ret, Ret,
484                       ConstantAsMetadata::get(ConstantInt::get(Int64, 0))};
485   return MDNode::get(A->getContext(), Ops);
486 }
487 
488 void Instruction::getAAMetadata(AAMDNodes &N, bool Merge) const {
489   if (Merge)
490     N.TBAA =
491         MDNode::getMostGenericTBAA(N.TBAA, getMetadata(LLVMContext::MD_tbaa));
492   else
493     N.TBAA = getMetadata(LLVMContext::MD_tbaa);
494 
495   if (Merge)
496     N.Scope = MDNode::getMostGenericAliasScope(
497         N.Scope, getMetadata(LLVMContext::MD_alias_scope));
498   else
499     N.Scope = getMetadata(LLVMContext::MD_alias_scope);
500 
501   if (Merge)
502     N.NoAlias =
503         MDNode::intersect(N.NoAlias, getMetadata(LLVMContext::MD_noalias));
504   else
505     N.NoAlias = getMetadata(LLVMContext::MD_noalias);
506 }
507 
508 /// Aliases - Test whether the type represented by A may alias the
509 /// type represented by B.
510 bool TypeBasedAAResult::Aliases(const MDNode *A, const MDNode *B) const {
511   // Verify that both input nodes are struct-path aware.  Auto-upgrade should
512   // have taken care of this.
513   assert(isStructPathTBAA(A) && "MDNode A is not struct-path aware.");
514   assert(isStructPathTBAA(B) && "MDNode B is not struct-path aware.");
515 
516   // Keep track of the root node for A and B.
517   TBAAStructTypeNode RootA, RootB;
518   TBAAStructTagNode TagA(A), TagB(B);
519 
520   // TODO: We need to check if AccessType of TagA encloses AccessType of
521   // TagB to support aggregate AccessType. If yes, return true.
522 
523   // Start from the base type of A, follow the edge with the correct offset in
524   // the type DAG and adjust the offset until we reach the base type of B or
525   // until we reach the Root node.
526   // Compare the adjusted offset once we have the same base.
527 
528   // Climb the type DAG from base type of A to see if we reach base type of B.
529   const MDNode *BaseA = TagA.getBaseType();
530   const MDNode *BaseB = TagB.getBaseType();
531   uint64_t OffsetA = TagA.getOffset(), OffsetB = TagB.getOffset();
532   for (TBAAStructTypeNode T(BaseA);;) {
533     if (T.getNode() == BaseB)
534       // Base type of A encloses base type of B, check if the offsets match.
535       return OffsetA == OffsetB;
536 
537     RootA = T;
538     // Follow the edge with the correct offset, OffsetA will be adjusted to
539     // be relative to the field type.
540     T = T.getParent(OffsetA);
541     if (!T.getNode())
542       break;
543   }
544 
545   // Reset OffsetA and climb the type DAG from base type of B to see if we reach
546   // base type of A.
547   OffsetA = TagA.getOffset();
548   for (TBAAStructTypeNode T(BaseB);;) {
549     if (T.getNode() == BaseA)
550       // Base type of B encloses base type of A, check if the offsets match.
551       return OffsetA == OffsetB;
552 
553     RootB = T;
554     // Follow the edge with the correct offset, OffsetB will be adjusted to
555     // be relative to the field type.
556     T = T.getParent(OffsetB);
557     if (!T.getNode())
558       break;
559   }
560 
561   // Neither node is an ancestor of the other.
562 
563   // If they have different roots, they're part of different potentially
564   // unrelated type systems, so we must be conservative.
565   if (RootA.getNode() != RootB.getNode())
566     return true;
567 
568   // If they have the same root, then we've proved there's no alias.
569   return false;
570 }
571 
572 AnalysisKey TypeBasedAA::Key;
573 
574 TypeBasedAAResult TypeBasedAA::run(Function &F, FunctionAnalysisManager &AM) {
575   return TypeBasedAAResult();
576 }
577 
578 char TypeBasedAAWrapperPass::ID = 0;
579 INITIALIZE_PASS(TypeBasedAAWrapperPass, "tbaa", "Type-Based Alias Analysis",
580                 false, true)
581 
582 ImmutablePass *llvm::createTypeBasedAAWrapperPass() {
583   return new TypeBasedAAWrapperPass();
584 }
585 
586 TypeBasedAAWrapperPass::TypeBasedAAWrapperPass() : ImmutablePass(ID) {
587   initializeTypeBasedAAWrapperPassPass(*PassRegistry::getPassRegistry());
588 }
589 
590 bool TypeBasedAAWrapperPass::doInitialization(Module &M) {
591   Result.reset(new TypeBasedAAResult());
592   return false;
593 }
594 
595 bool TypeBasedAAWrapperPass::doFinalization(Module &M) {
596   Result.reset();
597   return false;
598 }
599 
600 void TypeBasedAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
601   AU.setPreservesAll();
602 }
603