1 //===- TypeBasedAliasAnalysis.cpp - Type-Based Alias Analysis -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines the TypeBasedAliasAnalysis pass, which implements 11 // metadata-based TBAA. 12 // 13 // In LLVM IR, memory does not have types, so LLVM's own type system is not 14 // suitable for doing TBAA. Instead, metadata is added to the IR to describe 15 // a type system of a higher level language. This can be used to implement 16 // typical C/C++ TBAA, but it can also be used to implement custom alias 17 // analysis behavior for other languages. 18 // 19 // We now support two types of metadata format: scalar TBAA and struct-path 20 // aware TBAA. After all testing cases are upgraded to use struct-path aware 21 // TBAA and we can auto-upgrade existing bc files, the support for scalar TBAA 22 // can be dropped. 23 // 24 // The scalar TBAA metadata format is very simple. TBAA MDNodes have up to 25 // three fields, e.g.: 26 // !0 = !{ !"an example type tree" } 27 // !1 = !{ !"int", !0 } 28 // !2 = !{ !"float", !0 } 29 // !3 = !{ !"const float", !2, i64 1 } 30 // 31 // The first field is an identity field. It can be any value, usually 32 // an MDString, which uniquely identifies the type. The most important 33 // name in the tree is the name of the root node. Two trees with 34 // different root node names are entirely disjoint, even if they 35 // have leaves with common names. 36 // 37 // The second field identifies the type's parent node in the tree, or 38 // is null or omitted for a root node. A type is considered to alias 39 // all of its descendants and all of its ancestors in the tree. Also, 40 // a type is considered to alias all types in other trees, so that 41 // bitcode produced from multiple front-ends is handled conservatively. 42 // 43 // If the third field is present, it's an integer which if equal to 1 44 // indicates that the type is "constant" (meaning pointsToConstantMemory 45 // should return true; see 46 // http://llvm.org/docs/AliasAnalysis.html#OtherItfs). 47 // 48 // With struct-path aware TBAA, the MDNodes attached to an instruction using 49 // "!tbaa" are called path tag nodes. 50 // 51 // The path tag node has 4 fields with the last field being optional. 52 // 53 // The first field is the base type node, it can be a struct type node 54 // or a scalar type node. The second field is the access type node, it 55 // must be a scalar type node. The third field is the offset into the base type. 56 // The last field has the same meaning as the last field of our scalar TBAA: 57 // it's an integer which if equal to 1 indicates that the access is "constant". 58 // 59 // The struct type node has a name and a list of pairs, one pair for each member 60 // of the struct. The first element of each pair is a type node (a struct type 61 // node or a scalar type node), specifying the type of the member, the second 62 // element of each pair is the offset of the member. 63 // 64 // Given an example 65 // typedef struct { 66 // short s; 67 // } A; 68 // typedef struct { 69 // uint16_t s; 70 // A a; 71 // } B; 72 // 73 // For an access to B.a.s, we attach !5 (a path tag node) to the load/store 74 // instruction. The base type is !4 (struct B), the access type is !2 (scalar 75 // type short) and the offset is 4. 76 // 77 // !0 = !{!"Simple C/C++ TBAA"} 78 // !1 = !{!"omnipotent char", !0} // Scalar type node 79 // !2 = !{!"short", !1} // Scalar type node 80 // !3 = !{!"A", !2, i64 0} // Struct type node 81 // !4 = !{!"B", !2, i64 0, !3, i64 4} 82 // // Struct type node 83 // !5 = !{!4, !2, i64 4} // Path tag node 84 // 85 // The struct type nodes and the scalar type nodes form a type DAG. 86 // Root (!0) 87 // char (!1) -- edge to Root 88 // short (!2) -- edge to char 89 // A (!3) -- edge with offset 0 to short 90 // B (!4) -- edge with offset 0 to short and edge with offset 4 to A 91 // 92 // To check if two tags (tagX and tagY) can alias, we start from the base type 93 // of tagX, follow the edge with the correct offset in the type DAG and adjust 94 // the offset until we reach the base type of tagY or until we reach the Root 95 // node. 96 // If we reach the base type of tagY, compare the adjusted offset with 97 // offset of tagY, return Alias if the offsets are the same, return NoAlias 98 // otherwise. 99 // If we reach the Root node, perform the above starting from base type of tagY 100 // to see if we reach base type of tagX. 101 // 102 // If they have different roots, they're part of different potentially 103 // unrelated type systems, so we return Alias to be conservative. 104 // If neither node is an ancestor of the other and they have the same root, 105 // then we say NoAlias. 106 // 107 // TODO: The current metadata format doesn't support struct 108 // fields. For example: 109 // struct X { 110 // double d; 111 // int i; 112 // }; 113 // void foo(struct X *x, struct X *y, double *p) { 114 // *x = *y; 115 // *p = 0.0; 116 // } 117 // Struct X has a double member, so the store to *x can alias the store to *p. 118 // Currently it's not possible to precisely describe all the things struct X 119 // aliases, so struct assignments must use conservative TBAA nodes. There's 120 // no scheme for attaching metadata to @llvm.memcpy yet either. 121 // 122 //===----------------------------------------------------------------------===// 123 124 #include "llvm/Analysis/TypeBasedAliasAnalysis.h" 125 #include "llvm/ADT/SetVector.h" 126 #include "llvm/Analysis/AliasAnalysis.h" 127 #include "llvm/Analysis/MemoryLocation.h" 128 #include "llvm/IR/Constants.h" 129 #include "llvm/IR/DerivedTypes.h" 130 #include "llvm/IR/Instruction.h" 131 #include "llvm/IR/LLVMContext.h" 132 #include "llvm/IR/Metadata.h" 133 #include "llvm/Pass.h" 134 #include "llvm/Support/Casting.h" 135 #include "llvm/Support/CommandLine.h" 136 #include "llvm/Support/ErrorHandling.h" 137 #include <cassert> 138 #include <cstdint> 139 140 using namespace llvm; 141 142 // A handy option for disabling TBAA functionality. The same effect can also be 143 // achieved by stripping the !tbaa tags from IR, but this option is sometimes 144 // more convenient. 145 static cl::opt<bool> EnableTBAA("enable-tbaa", cl::init(true)); 146 147 namespace { 148 149 /// This is a simple wrapper around an MDNode which provides a higher-level 150 /// interface by hiding the details of how alias analysis information is encoded 151 /// in its operands. 152 template<typename MDNodeTy> 153 class TBAANodeImpl { 154 MDNodeTy *Node = nullptr; 155 156 public: 157 TBAANodeImpl() = default; 158 explicit TBAANodeImpl(MDNodeTy *N) : Node(N) {} 159 160 /// getNode - Get the MDNode for this TBAANode. 161 MDNodeTy *getNode() const { return Node; } 162 163 /// getParent - Get this TBAANode's Alias tree parent. 164 TBAANodeImpl<MDNodeTy> getParent() const { 165 if (Node->getNumOperands() < 2) 166 return TBAANodeImpl<MDNodeTy>(); 167 MDNodeTy *P = dyn_cast_or_null<MDNodeTy>(Node->getOperand(1)); 168 if (!P) 169 return TBAANodeImpl<MDNodeTy>(); 170 // Ok, this node has a valid parent. Return it. 171 return TBAANodeImpl<MDNodeTy>(P); 172 } 173 174 /// Test if this TBAANode represents a type for objects which are 175 /// not modified (by any means) in the context where this 176 /// AliasAnalysis is relevant. 177 bool isTypeImmutable() const { 178 if (Node->getNumOperands() < 3) 179 return false; 180 ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Node->getOperand(2)); 181 if (!CI) 182 return false; 183 return CI->getValue()[0]; 184 } 185 }; 186 187 /// \name Specializations of \c TBAANodeImpl for const and non const qualified 188 /// \c MDNode. 189 /// @{ 190 using TBAANode = TBAANodeImpl<const MDNode>; 191 using MutableTBAANode = TBAANodeImpl<MDNode>; 192 /// @} 193 194 /// This is a simple wrapper around an MDNode which provides a 195 /// higher-level interface by hiding the details of how alias analysis 196 /// information is encoded in its operands. 197 template<typename MDNodeTy> 198 class TBAAStructTagNodeImpl { 199 /// This node should be created with createTBAAStructTagNode. 200 MDNodeTy *Node; 201 202 public: 203 explicit TBAAStructTagNodeImpl(MDNodeTy *N) : Node(N) {} 204 205 /// Get the MDNode for this TBAAStructTagNode. 206 MDNodeTy *getNode() const { return Node; } 207 208 MDNodeTy *getBaseType() const { 209 return dyn_cast_or_null<MDNode>(Node->getOperand(0)); 210 } 211 212 MDNodeTy *getAccessType() const { 213 return dyn_cast_or_null<MDNode>(Node->getOperand(1)); 214 } 215 216 uint64_t getOffset() const { 217 return mdconst::extract<ConstantInt>(Node->getOperand(2))->getZExtValue(); 218 } 219 220 /// Test if this TBAAStructTagNode represents a type for objects 221 /// which are not modified (by any means) in the context where this 222 /// AliasAnalysis is relevant. 223 bool isTypeImmutable() const { 224 if (Node->getNumOperands() < 4) 225 return false; 226 ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Node->getOperand(3)); 227 if (!CI) 228 return false; 229 return CI->getValue()[0]; 230 } 231 }; 232 233 /// \name Specializations of \c TBAAStructTagNodeImpl for const and non const 234 /// qualified \c MDNods. 235 /// @{ 236 using TBAAStructTagNode = TBAAStructTagNodeImpl<const MDNode>; 237 using MutableTBAAStructTagNode = TBAAStructTagNodeImpl<MDNode>; 238 /// @} 239 240 /// This is a simple wrapper around an MDNode which provides a 241 /// higher-level interface by hiding the details of how alias analysis 242 /// information is encoded in its operands. 243 class TBAAStructTypeNode { 244 /// This node should be created with createTBAAStructTypeNode. 245 const MDNode *Node = nullptr; 246 247 public: 248 TBAAStructTypeNode() = default; 249 explicit TBAAStructTypeNode(const MDNode *N) : Node(N) {} 250 251 /// Get the MDNode for this TBAAStructTypeNode. 252 const MDNode *getNode() const { return Node; } 253 254 /// Get this TBAAStructTypeNode's field in the type DAG with 255 /// given offset. Update the offset to be relative to the field type. 256 TBAAStructTypeNode getParent(uint64_t &Offset) const { 257 // Parent can be omitted for the root node. 258 if (Node->getNumOperands() < 2) 259 return TBAAStructTypeNode(); 260 261 // Fast path for a scalar type node and a struct type node with a single 262 // field. 263 if (Node->getNumOperands() <= 3) { 264 uint64_t Cur = Node->getNumOperands() == 2 265 ? 0 266 : mdconst::extract<ConstantInt>(Node->getOperand(2)) 267 ->getZExtValue(); 268 Offset -= Cur; 269 MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1)); 270 if (!P) 271 return TBAAStructTypeNode(); 272 return TBAAStructTypeNode(P); 273 } 274 275 // Assume the offsets are in order. We return the previous field if 276 // the current offset is bigger than the given offset. 277 unsigned TheIdx = 0; 278 for (unsigned Idx = 1; Idx < Node->getNumOperands(); Idx += 2) { 279 uint64_t Cur = mdconst::extract<ConstantInt>(Node->getOperand(Idx + 1)) 280 ->getZExtValue(); 281 if (Cur > Offset) { 282 assert(Idx >= 3 && 283 "TBAAStructTypeNode::getParent should have an offset match!"); 284 TheIdx = Idx - 2; 285 break; 286 } 287 } 288 // Move along the last field. 289 if (TheIdx == 0) 290 TheIdx = Node->getNumOperands() - 2; 291 uint64_t Cur = mdconst::extract<ConstantInt>(Node->getOperand(TheIdx + 1)) 292 ->getZExtValue(); 293 Offset -= Cur; 294 MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(TheIdx)); 295 if (!P) 296 return TBAAStructTypeNode(); 297 return TBAAStructTypeNode(P); 298 } 299 }; 300 301 } // end anonymous namespace 302 303 /// Check the first operand of the tbaa tag node, if it is a MDNode, we treat 304 /// it as struct-path aware TBAA format, otherwise, we treat it as scalar TBAA 305 /// format. 306 static bool isStructPathTBAA(const MDNode *MD) { 307 // Anonymous TBAA root starts with a MDNode and dragonegg uses it as 308 // a TBAA tag. 309 return isa<MDNode>(MD->getOperand(0)) && MD->getNumOperands() >= 3; 310 } 311 312 AliasResult TypeBasedAAResult::alias(const MemoryLocation &LocA, 313 const MemoryLocation &LocB) { 314 if (!EnableTBAA) 315 return AAResultBase::alias(LocA, LocB); 316 317 // Get the attached MDNodes. If either value lacks a tbaa MDNode, we must 318 // be conservative. 319 const MDNode *AM = LocA.AATags.TBAA; 320 if (!AM) 321 return AAResultBase::alias(LocA, LocB); 322 const MDNode *BM = LocB.AATags.TBAA; 323 if (!BM) 324 return AAResultBase::alias(LocA, LocB); 325 326 // If they may alias, chain to the next AliasAnalysis. 327 if (Aliases(AM, BM)) 328 return AAResultBase::alias(LocA, LocB); 329 330 // Otherwise return a definitive result. 331 return NoAlias; 332 } 333 334 bool TypeBasedAAResult::pointsToConstantMemory(const MemoryLocation &Loc, 335 bool OrLocal) { 336 if (!EnableTBAA) 337 return AAResultBase::pointsToConstantMemory(Loc, OrLocal); 338 339 const MDNode *M = Loc.AATags.TBAA; 340 if (!M) 341 return AAResultBase::pointsToConstantMemory(Loc, OrLocal); 342 343 // If this is an "immutable" type, we can assume the pointer is pointing 344 // to constant memory. 345 if ((!isStructPathTBAA(M) && TBAANode(M).isTypeImmutable()) || 346 (isStructPathTBAA(M) && TBAAStructTagNode(M).isTypeImmutable())) 347 return true; 348 349 return AAResultBase::pointsToConstantMemory(Loc, OrLocal); 350 } 351 352 FunctionModRefBehavior 353 TypeBasedAAResult::getModRefBehavior(ImmutableCallSite CS) { 354 if (!EnableTBAA) 355 return AAResultBase::getModRefBehavior(CS); 356 357 FunctionModRefBehavior Min = FMRB_UnknownModRefBehavior; 358 359 // If this is an "immutable" type, we can assume the call doesn't write 360 // to memory. 361 if (const MDNode *M = CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa)) 362 if ((!isStructPathTBAA(M) && TBAANode(M).isTypeImmutable()) || 363 (isStructPathTBAA(M) && TBAAStructTagNode(M).isTypeImmutable())) 364 Min = FMRB_OnlyReadsMemory; 365 366 return FunctionModRefBehavior(AAResultBase::getModRefBehavior(CS) & Min); 367 } 368 369 FunctionModRefBehavior TypeBasedAAResult::getModRefBehavior(const Function *F) { 370 // Functions don't have metadata. Just chain to the next implementation. 371 return AAResultBase::getModRefBehavior(F); 372 } 373 374 ModRefInfo TypeBasedAAResult::getModRefInfo(ImmutableCallSite CS, 375 const MemoryLocation &Loc) { 376 if (!EnableTBAA) 377 return AAResultBase::getModRefInfo(CS, Loc); 378 379 if (const MDNode *L = Loc.AATags.TBAA) 380 if (const MDNode *M = 381 CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa)) 382 if (!Aliases(L, M)) 383 return MRI_NoModRef; 384 385 return AAResultBase::getModRefInfo(CS, Loc); 386 } 387 388 ModRefInfo TypeBasedAAResult::getModRefInfo(ImmutableCallSite CS1, 389 ImmutableCallSite CS2) { 390 if (!EnableTBAA) 391 return AAResultBase::getModRefInfo(CS1, CS2); 392 393 if (const MDNode *M1 = 394 CS1.getInstruction()->getMetadata(LLVMContext::MD_tbaa)) 395 if (const MDNode *M2 = 396 CS2.getInstruction()->getMetadata(LLVMContext::MD_tbaa)) 397 if (!Aliases(M1, M2)) 398 return MRI_NoModRef; 399 400 return AAResultBase::getModRefInfo(CS1, CS2); 401 } 402 403 bool MDNode::isTBAAVtableAccess() const { 404 if (!isStructPathTBAA(this)) { 405 if (getNumOperands() < 1) 406 return false; 407 if (MDString *Tag1 = dyn_cast<MDString>(getOperand(0))) { 408 if (Tag1->getString() == "vtable pointer") 409 return true; 410 } 411 return false; 412 } 413 414 // For struct-path aware TBAA, we use the access type of the tag. 415 if (getNumOperands() < 2) 416 return false; 417 MDNode *Tag = cast_or_null<MDNode>(getOperand(1)); 418 if (!Tag) 419 return false; 420 if (MDString *Tag1 = dyn_cast<MDString>(Tag->getOperand(0))) { 421 if (Tag1->getString() == "vtable pointer") 422 return true; 423 } 424 return false; 425 } 426 427 MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) { 428 if (!A || !B) 429 return nullptr; 430 431 if (A == B) 432 return A; 433 434 // For struct-path aware TBAA, we use the access type of the tag. 435 assert(isStructPathTBAA(A) && isStructPathTBAA(B) && 436 "Auto upgrade should have taken care of this!"); 437 A = cast_or_null<MDNode>(MutableTBAAStructTagNode(A).getAccessType()); 438 if (!A) 439 return nullptr; 440 B = cast_or_null<MDNode>(MutableTBAAStructTagNode(B).getAccessType()); 441 if (!B) 442 return nullptr; 443 444 SmallSetVector<MDNode *, 4> PathA; 445 MutableTBAANode TA(A); 446 while (TA.getNode()) { 447 if (PathA.count(TA.getNode())) 448 report_fatal_error("Cycle found in TBAA metadata."); 449 PathA.insert(TA.getNode()); 450 TA = TA.getParent(); 451 } 452 453 SmallSetVector<MDNode *, 4> PathB; 454 MutableTBAANode TB(B); 455 while (TB.getNode()) { 456 if (PathB.count(TB.getNode())) 457 report_fatal_error("Cycle found in TBAA metadata."); 458 PathB.insert(TB.getNode()); 459 TB = TB.getParent(); 460 } 461 462 int IA = PathA.size() - 1; 463 int IB = PathB.size() - 1; 464 465 MDNode *Ret = nullptr; 466 while (IA >= 0 && IB >= 0) { 467 if (PathA[IA] == PathB[IB]) 468 Ret = PathA[IA]; 469 else 470 break; 471 --IA; 472 --IB; 473 } 474 475 // We either did not find a match, or the only common base "type" is 476 // the root node. In either case, we don't have any useful TBAA 477 // metadata to attach. 478 if (!Ret || Ret->getNumOperands() < 2) 479 return nullptr; 480 481 // We need to convert from a type node to a tag node. 482 Type *Int64 = IntegerType::get(A->getContext(), 64); 483 Metadata *Ops[3] = {Ret, Ret, 484 ConstantAsMetadata::get(ConstantInt::get(Int64, 0))}; 485 return MDNode::get(A->getContext(), Ops); 486 } 487 488 void Instruction::getAAMetadata(AAMDNodes &N, bool Merge) const { 489 if (Merge) 490 N.TBAA = 491 MDNode::getMostGenericTBAA(N.TBAA, getMetadata(LLVMContext::MD_tbaa)); 492 else 493 N.TBAA = getMetadata(LLVMContext::MD_tbaa); 494 495 if (Merge) 496 N.Scope = MDNode::getMostGenericAliasScope( 497 N.Scope, getMetadata(LLVMContext::MD_alias_scope)); 498 else 499 N.Scope = getMetadata(LLVMContext::MD_alias_scope); 500 501 if (Merge) 502 N.NoAlias = 503 MDNode::intersect(N.NoAlias, getMetadata(LLVMContext::MD_noalias)); 504 else 505 N.NoAlias = getMetadata(LLVMContext::MD_noalias); 506 } 507 508 /// Aliases - Test whether the type represented by A may alias the 509 /// type represented by B. 510 bool TypeBasedAAResult::Aliases(const MDNode *A, const MDNode *B) const { 511 // Verify that both input nodes are struct-path aware. Auto-upgrade should 512 // have taken care of this. 513 assert(isStructPathTBAA(A) && "MDNode A is not struct-path aware."); 514 assert(isStructPathTBAA(B) && "MDNode B is not struct-path aware."); 515 516 // Keep track of the root node for A and B. 517 TBAAStructTypeNode RootA, RootB; 518 TBAAStructTagNode TagA(A), TagB(B); 519 520 // TODO: We need to check if AccessType of TagA encloses AccessType of 521 // TagB to support aggregate AccessType. If yes, return true. 522 523 // Start from the base type of A, follow the edge with the correct offset in 524 // the type DAG and adjust the offset until we reach the base type of B or 525 // until we reach the Root node. 526 // Compare the adjusted offset once we have the same base. 527 528 // Climb the type DAG from base type of A to see if we reach base type of B. 529 const MDNode *BaseA = TagA.getBaseType(); 530 const MDNode *BaseB = TagB.getBaseType(); 531 uint64_t OffsetA = TagA.getOffset(), OffsetB = TagB.getOffset(); 532 for (TBAAStructTypeNode T(BaseA);;) { 533 if (T.getNode() == BaseB) 534 // Base type of A encloses base type of B, check if the offsets match. 535 return OffsetA == OffsetB; 536 537 RootA = T; 538 // Follow the edge with the correct offset, OffsetA will be adjusted to 539 // be relative to the field type. 540 T = T.getParent(OffsetA); 541 if (!T.getNode()) 542 break; 543 } 544 545 // Reset OffsetA and climb the type DAG from base type of B to see if we reach 546 // base type of A. 547 OffsetA = TagA.getOffset(); 548 for (TBAAStructTypeNode T(BaseB);;) { 549 if (T.getNode() == BaseA) 550 // Base type of B encloses base type of A, check if the offsets match. 551 return OffsetA == OffsetB; 552 553 RootB = T; 554 // Follow the edge with the correct offset, OffsetB will be adjusted to 555 // be relative to the field type. 556 T = T.getParent(OffsetB); 557 if (!T.getNode()) 558 break; 559 } 560 561 // Neither node is an ancestor of the other. 562 563 // If they have different roots, they're part of different potentially 564 // unrelated type systems, so we must be conservative. 565 if (RootA.getNode() != RootB.getNode()) 566 return true; 567 568 // If they have the same root, then we've proved there's no alias. 569 return false; 570 } 571 572 AnalysisKey TypeBasedAA::Key; 573 574 TypeBasedAAResult TypeBasedAA::run(Function &F, FunctionAnalysisManager &AM) { 575 return TypeBasedAAResult(); 576 } 577 578 char TypeBasedAAWrapperPass::ID = 0; 579 INITIALIZE_PASS(TypeBasedAAWrapperPass, "tbaa", "Type-Based Alias Analysis", 580 false, true) 581 582 ImmutablePass *llvm::createTypeBasedAAWrapperPass() { 583 return new TypeBasedAAWrapperPass(); 584 } 585 586 TypeBasedAAWrapperPass::TypeBasedAAWrapperPass() : ImmutablePass(ID) { 587 initializeTypeBasedAAWrapperPassPass(*PassRegistry::getPassRegistry()); 588 } 589 590 bool TypeBasedAAWrapperPass::doInitialization(Module &M) { 591 Result.reset(new TypeBasedAAResult()); 592 return false; 593 } 594 595 bool TypeBasedAAWrapperPass::doFinalization(Module &M) { 596 Result.reset(); 597 return false; 598 } 599 600 void TypeBasedAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { 601 AU.setPreservesAll(); 602 } 603