1 //===- TypeBasedAliasAnalysis.cpp - Type-Based Alias Analysis -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines the TypeBasedAliasAnalysis pass, which implements 11 // metadata-based TBAA. 12 // 13 // In LLVM IR, memory does not have types, so LLVM's own type system is not 14 // suitable for doing TBAA. Instead, metadata is added to the IR to describe 15 // a type system of a higher level language. This can be used to implement 16 // typical C/C++ TBAA, but it can also be used to implement custom alias 17 // analysis behavior for other languages. 18 // 19 // We now support two types of metadata format: scalar TBAA and struct-path 20 // aware TBAA. After all testing cases are upgraded to use struct-path aware 21 // TBAA and we can auto-upgrade existing bc files, the support for scalar TBAA 22 // can be dropped. 23 // 24 // The scalar TBAA metadata format is very simple. TBAA MDNodes have up to 25 // three fields, e.g.: 26 // !0 = metadata !{ metadata !"an example type tree" } 27 // !1 = metadata !{ metadata !"int", metadata !0 } 28 // !2 = metadata !{ metadata !"float", metadata !0 } 29 // !3 = metadata !{ metadata !"const float", metadata !2, i64 1 } 30 // 31 // The first field is an identity field. It can be any value, usually 32 // an MDString, which uniquely identifies the type. The most important 33 // name in the tree is the name of the root node. Two trees with 34 // different root node names are entirely disjoint, even if they 35 // have leaves with common names. 36 // 37 // The second field identifies the type's parent node in the tree, or 38 // is null or omitted for a root node. A type is considered to alias 39 // all of its descendants and all of its ancestors in the tree. Also, 40 // a type is considered to alias all types in other trees, so that 41 // bitcode produced from multiple front-ends is handled conservatively. 42 // 43 // If the third field is present, it's an integer which if equal to 1 44 // indicates that the type is "constant" (meaning pointsToConstantMemory 45 // should return true; see 46 // http://llvm.org/docs/AliasAnalysis.html#OtherItfs). 47 // 48 // With struct-path aware TBAA, the MDNodes attached to an instruction using 49 // "!tbaa" are called path tag nodes. 50 // 51 // The path tag node has 4 fields with the last field being optional. 52 // 53 // The first field is the base type node, it can be a struct type node 54 // or a scalar type node. The second field is the access type node, it 55 // must be a scalar type node. The third field is the offset into the base type. 56 // The last field has the same meaning as the last field of our scalar TBAA: 57 // it's an integer which if equal to 1 indicates that the access is "constant". 58 // 59 // The struct type node has a name and a list of pairs, one pair for each member 60 // of the struct. The first element of each pair is a type node (a struct type 61 // node or a sclar type node), specifying the type of the member, the second 62 // element of each pair is the offset of the member. 63 // 64 // Given an example 65 // typedef struct { 66 // short s; 67 // } A; 68 // typedef struct { 69 // uint16_t s; 70 // A a; 71 // } B; 72 // 73 // For an acess to B.a.s, we attach !5 (a path tag node) to the load/store 74 // instruction. The base type is !4 (struct B), the access type is !2 (scalar 75 // type short) and the offset is 4. 76 // 77 // !0 = metadata !{metadata !"Simple C/C++ TBAA"} 78 // !1 = metadata !{metadata !"omnipotent char", metadata !0} // Scalar type node 79 // !2 = metadata !{metadata !"short", metadata !1} // Scalar type node 80 // !3 = metadata !{metadata !"A", metadata !2, i64 0} // Struct type node 81 // !4 = metadata !{metadata !"B", metadata !2, i64 0, metadata !3, i64 4} 82 // // Struct type node 83 // !5 = metadata !{metadata !4, metadata !2, i64 4} // Path tag node 84 // 85 // The struct type nodes and the scalar type nodes form a type DAG. 86 // Root (!0) 87 // char (!1) -- edge to Root 88 // short (!2) -- edge to char 89 // A (!3) -- edge with offset 0 to short 90 // B (!4) -- edge with offset 0 to short and edge with offset 4 to A 91 // 92 // To check if two tags (tagX and tagY) can alias, we start from the base type 93 // of tagX, follow the edge with the correct offset in the type DAG and adjust 94 // the offset until we reach the base type of tagY or until we reach the Root 95 // node. 96 // If we reach the base type of tagY, compare the adjusted offset with 97 // offset of tagY, return Alias if the offsets are the same, return NoAlias 98 // otherwise. 99 // If we reach the Root node, perform the above starting from base type of tagY 100 // to see if we reach base type of tagX. 101 // 102 // If they have different roots, they're part of different potentially 103 // unrelated type systems, so we return Alias to be conservative. 104 // If neither node is an ancestor of the other and they have the same root, 105 // then we say NoAlias. 106 // 107 // TODO: The current metadata format doesn't support struct 108 // fields. For example: 109 // struct X { 110 // double d; 111 // int i; 112 // }; 113 // void foo(struct X *x, struct X *y, double *p) { 114 // *x = *y; 115 // *p = 0.0; 116 // } 117 // Struct X has a double member, so the store to *x can alias the store to *p. 118 // Currently it's not possible to precisely describe all the things struct X 119 // aliases, so struct assignments must use conservative TBAA nodes. There's 120 // no scheme for attaching metadata to @llvm.memcpy yet either. 121 // 122 //===----------------------------------------------------------------------===// 123 124 #include "llvm/Analysis/Passes.h" 125 #include "llvm/Analysis/AliasAnalysis.h" 126 #include "llvm/IR/Constants.h" 127 #include "llvm/IR/LLVMContext.h" 128 #include "llvm/IR/Metadata.h" 129 #include "llvm/IR/Module.h" 130 #include "llvm/Pass.h" 131 #include "llvm/Support/CommandLine.h" 132 using namespace llvm; 133 134 // A handy option for disabling TBAA functionality. The same effect can also be 135 // achieved by stripping the !tbaa tags from IR, but this option is sometimes 136 // more convenient. 137 static cl::opt<bool> EnableTBAA("enable-tbaa", cl::init(true)); 138 139 namespace { 140 /// TBAANode - This is a simple wrapper around an MDNode which provides a 141 /// higher-level interface by hiding the details of how alias analysis 142 /// information is encoded in its operands. 143 class TBAANode { 144 const MDNode *Node; 145 146 public: 147 TBAANode() : Node(0) {} 148 explicit TBAANode(const MDNode *N) : Node(N) {} 149 150 /// getNode - Get the MDNode for this TBAANode. 151 const MDNode *getNode() const { return Node; } 152 153 /// getParent - Get this TBAANode's Alias tree parent. 154 TBAANode getParent() const { 155 if (Node->getNumOperands() < 2) 156 return TBAANode(); 157 MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1)); 158 if (!P) 159 return TBAANode(); 160 // Ok, this node has a valid parent. Return it. 161 return TBAANode(P); 162 } 163 164 /// TypeIsImmutable - Test if this TBAANode represents a type for objects 165 /// which are not modified (by any means) in the context where this 166 /// AliasAnalysis is relevant. 167 bool TypeIsImmutable() const { 168 if (Node->getNumOperands() < 3) 169 return false; 170 ConstantInt *CI = dyn_cast<ConstantInt>(Node->getOperand(2)); 171 if (!CI) 172 return false; 173 return CI->getValue()[0]; 174 } 175 }; 176 177 /// This is a simple wrapper around an MDNode which provides a 178 /// higher-level interface by hiding the details of how alias analysis 179 /// information is encoded in its operands. 180 class TBAAStructTagNode { 181 /// This node should be created with createTBAAStructTagNode. 182 const MDNode *Node; 183 184 public: 185 TBAAStructTagNode() : Node(0) {} 186 explicit TBAAStructTagNode(const MDNode *N) : Node(N) {} 187 188 /// Get the MDNode for this TBAAStructTagNode. 189 const MDNode *getNode() const { return Node; } 190 191 const MDNode *getBaseType() const { 192 return dyn_cast_or_null<MDNode>(Node->getOperand(0)); 193 } 194 const MDNode *getAccessType() const { 195 return dyn_cast_or_null<MDNode>(Node->getOperand(1)); 196 } 197 uint64_t getOffset() const { 198 return cast<ConstantInt>(Node->getOperand(2))->getZExtValue(); 199 } 200 /// TypeIsImmutable - Test if this TBAAStructTagNode represents a type for 201 /// objects which are not modified (by any means) in the context where this 202 /// AliasAnalysis is relevant. 203 bool TypeIsImmutable() const { 204 if (Node->getNumOperands() < 4) 205 return false; 206 ConstantInt *CI = dyn_cast<ConstantInt>(Node->getOperand(3)); 207 if (!CI) 208 return false; 209 return CI->getValue()[0]; 210 } 211 }; 212 213 /// This is a simple wrapper around an MDNode which provides a 214 /// higher-level interface by hiding the details of how alias analysis 215 /// information is encoded in its operands. 216 class TBAAStructTypeNode { 217 /// This node should be created with createTBAAStructTypeNode. 218 const MDNode *Node; 219 220 public: 221 TBAAStructTypeNode() : Node(0) {} 222 explicit TBAAStructTypeNode(const MDNode *N) : Node(N) {} 223 224 /// Get the MDNode for this TBAAStructTypeNode. 225 const MDNode *getNode() const { return Node; } 226 227 /// Get this TBAAStructTypeNode's field in the type DAG with 228 /// given offset. Update the offset to be relative to the field type. 229 TBAAStructTypeNode getParent(uint64_t &Offset) const { 230 // Parent can be omitted for the root node. 231 if (Node->getNumOperands() < 2) 232 return TBAAStructTypeNode(); 233 234 // Special handling for a scalar type node. 235 if (Node->getNumOperands() <= 3) { 236 MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1)); 237 if (!P) 238 return TBAAStructTypeNode(); 239 return TBAAStructTypeNode(P); 240 } 241 242 // Assume the offsets are in order. We return the previous field if 243 // the current offset is bigger than the given offset. 244 unsigned TheIdx = 0; 245 for (unsigned Idx = 1; Idx < Node->getNumOperands(); Idx += 2) { 246 uint64_t Cur = cast<ConstantInt>(Node->getOperand(Idx + 1))-> 247 getZExtValue(); 248 if (Cur > Offset) { 249 assert(Idx >= 3 && 250 "TBAAStructTypeNode::getParent should have an offset match!"); 251 TheIdx = Idx - 2; 252 break; 253 } 254 } 255 // Move along the last field. 256 if (TheIdx == 0) 257 TheIdx = Node->getNumOperands() - 2; 258 uint64_t Cur = cast<ConstantInt>(Node->getOperand(TheIdx + 1))-> 259 getZExtValue(); 260 Offset -= Cur; 261 MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(TheIdx)); 262 if (!P) 263 return TBAAStructTypeNode(); 264 return TBAAStructTypeNode(P); 265 } 266 }; 267 } 268 269 namespace { 270 /// TypeBasedAliasAnalysis - This is a simple alias analysis 271 /// implementation that uses TypeBased to answer queries. 272 class TypeBasedAliasAnalysis : public ImmutablePass, 273 public AliasAnalysis { 274 public: 275 static char ID; // Class identification, replacement for typeinfo 276 TypeBasedAliasAnalysis() : ImmutablePass(ID) { 277 initializeTypeBasedAliasAnalysisPass(*PassRegistry::getPassRegistry()); 278 } 279 280 virtual void initializePass() { 281 InitializeAliasAnalysis(this); 282 } 283 284 /// getAdjustedAnalysisPointer - This method is used when a pass implements 285 /// an analysis interface through multiple inheritance. If needed, it 286 /// should override this to adjust the this pointer as needed for the 287 /// specified pass info. 288 virtual void *getAdjustedAnalysisPointer(const void *PI) { 289 if (PI == &AliasAnalysis::ID) 290 return (AliasAnalysis*)this; 291 return this; 292 } 293 294 bool Aliases(const MDNode *A, const MDNode *B) const; 295 bool PathAliases(const MDNode *A, const MDNode *B) const; 296 297 private: 298 virtual void getAnalysisUsage(AnalysisUsage &AU) const; 299 virtual AliasResult alias(const Location &LocA, const Location &LocB); 300 virtual bool pointsToConstantMemory(const Location &Loc, bool OrLocal); 301 virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS); 302 virtual ModRefBehavior getModRefBehavior(const Function *F); 303 virtual ModRefResult getModRefInfo(ImmutableCallSite CS, 304 const Location &Loc); 305 virtual ModRefResult getModRefInfo(ImmutableCallSite CS1, 306 ImmutableCallSite CS2); 307 }; 308 } // End of anonymous namespace 309 310 // Register this pass... 311 char TypeBasedAliasAnalysis::ID = 0; 312 INITIALIZE_AG_PASS(TypeBasedAliasAnalysis, AliasAnalysis, "tbaa", 313 "Type-Based Alias Analysis", false, true, false) 314 315 ImmutablePass *llvm::createTypeBasedAliasAnalysisPass() { 316 return new TypeBasedAliasAnalysis(); 317 } 318 319 void 320 TypeBasedAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { 321 AU.setPreservesAll(); 322 AliasAnalysis::getAnalysisUsage(AU); 323 } 324 325 /// Check the first operand of the tbaa tag node, if it is a MDNode, we treat 326 /// it as struct-path aware TBAA format, otherwise, we treat it as scalar TBAA 327 /// format. 328 static bool isStructPathTBAA(const MDNode *MD) { 329 // Anonymous TBAA root starts with a MDNode and dragonegg uses it as 330 // a TBAA tag. 331 return isa<MDNode>(MD->getOperand(0)) && MD->getNumOperands() >= 3; 332 } 333 334 /// Aliases - Test whether the type represented by A may alias the 335 /// type represented by B. 336 bool 337 TypeBasedAliasAnalysis::Aliases(const MDNode *A, 338 const MDNode *B) const { 339 if (isStructPathTBAA(A)) 340 return PathAliases(A, B); 341 342 // Keep track of the root node for A and B. 343 TBAANode RootA, RootB; 344 345 // Climb the tree from A to see if we reach B. 346 for (TBAANode T(A); ; ) { 347 if (T.getNode() == B) 348 // B is an ancestor of A. 349 return true; 350 351 RootA = T; 352 T = T.getParent(); 353 if (!T.getNode()) 354 break; 355 } 356 357 // Climb the tree from B to see if we reach A. 358 for (TBAANode T(B); ; ) { 359 if (T.getNode() == A) 360 // A is an ancestor of B. 361 return true; 362 363 RootB = T; 364 T = T.getParent(); 365 if (!T.getNode()) 366 break; 367 } 368 369 // Neither node is an ancestor of the other. 370 371 // If they have different roots, they're part of different potentially 372 // unrelated type systems, so we must be conservative. 373 if (RootA.getNode() != RootB.getNode()) 374 return true; 375 376 // If they have the same root, then we've proved there's no alias. 377 return false; 378 } 379 380 /// Test whether the struct-path tag represented by A may alias the 381 /// struct-path tag represented by B. 382 bool 383 TypeBasedAliasAnalysis::PathAliases(const MDNode *A, 384 const MDNode *B) const { 385 // Keep track of the root node for A and B. 386 TBAAStructTypeNode RootA, RootB; 387 TBAAStructTagNode TagA(A), TagB(B); 388 389 // TODO: We need to check if AccessType of TagA encloses AccessType of 390 // TagB to support aggregate AccessType. If yes, return true. 391 392 // Start from the base type of A, follow the edge with the correct offset in 393 // the type DAG and adjust the offset until we reach the base type of B or 394 // until we reach the Root node. 395 // Compare the adjusted offset once we have the same base. 396 397 // Climb the type DAG from base type of A to see if we reach base type of B. 398 const MDNode *BaseA = TagA.getBaseType(); 399 const MDNode *BaseB = TagB.getBaseType(); 400 uint64_t OffsetA = TagA.getOffset(), OffsetB = TagB.getOffset(); 401 for (TBAAStructTypeNode T(BaseA); ; ) { 402 if (T.getNode() == BaseB) 403 // Base type of A encloses base type of B, check if the offsets match. 404 return OffsetA == OffsetB; 405 406 RootA = T; 407 // Follow the edge with the correct offset, OffsetA will be adjusted to 408 // be relative to the field type. 409 T = T.getParent(OffsetA); 410 if (!T.getNode()) 411 break; 412 } 413 414 // Reset OffsetA and climb the type DAG from base type of B to see if we reach 415 // base type of A. 416 OffsetA = TagA.getOffset(); 417 for (TBAAStructTypeNode T(BaseB); ; ) { 418 if (T.getNode() == BaseA) 419 // Base type of B encloses base type of A, check if the offsets match. 420 return OffsetA == OffsetB; 421 422 RootB = T; 423 // Follow the edge with the correct offset, OffsetB will be adjusted to 424 // be relative to the field type. 425 T = T.getParent(OffsetB); 426 if (!T.getNode()) 427 break; 428 } 429 430 // Neither node is an ancestor of the other. 431 432 // If they have different roots, they're part of different potentially 433 // unrelated type systems, so we must be conservative. 434 if (RootA.getNode() != RootB.getNode()) 435 return true; 436 437 // If they have the same root, then we've proved there's no alias. 438 return false; 439 } 440 441 AliasAnalysis::AliasResult 442 TypeBasedAliasAnalysis::alias(const Location &LocA, 443 const Location &LocB) { 444 if (!EnableTBAA) 445 return AliasAnalysis::alias(LocA, LocB); 446 447 // Get the attached MDNodes. If either value lacks a tbaa MDNode, we must 448 // be conservative. 449 const MDNode *AM = LocA.TBAATag; 450 if (!AM) return AliasAnalysis::alias(LocA, LocB); 451 const MDNode *BM = LocB.TBAATag; 452 if (!BM) return AliasAnalysis::alias(LocA, LocB); 453 454 // If they may alias, chain to the next AliasAnalysis. 455 if (Aliases(AM, BM)) 456 return AliasAnalysis::alias(LocA, LocB); 457 458 // Otherwise return a definitive result. 459 return NoAlias; 460 } 461 462 bool TypeBasedAliasAnalysis::pointsToConstantMemory(const Location &Loc, 463 bool OrLocal) { 464 if (!EnableTBAA) 465 return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); 466 467 const MDNode *M = Loc.TBAATag; 468 if (!M) return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); 469 470 // If this is an "immutable" type, we can assume the pointer is pointing 471 // to constant memory. 472 if ((!isStructPathTBAA(M) && TBAANode(M).TypeIsImmutable()) || 473 (isStructPathTBAA(M) && TBAAStructTagNode(M).TypeIsImmutable())) 474 return true; 475 476 return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); 477 } 478 479 AliasAnalysis::ModRefBehavior 480 TypeBasedAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) { 481 if (!EnableTBAA) 482 return AliasAnalysis::getModRefBehavior(CS); 483 484 ModRefBehavior Min = UnknownModRefBehavior; 485 486 // If this is an "immutable" type, we can assume the call doesn't write 487 // to memory. 488 if (const MDNode *M = CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa)) 489 if ((!isStructPathTBAA(M) && TBAANode(M).TypeIsImmutable()) || 490 (isStructPathTBAA(M) && TBAAStructTagNode(M).TypeIsImmutable())) 491 Min = OnlyReadsMemory; 492 493 return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min); 494 } 495 496 AliasAnalysis::ModRefBehavior 497 TypeBasedAliasAnalysis::getModRefBehavior(const Function *F) { 498 // Functions don't have metadata. Just chain to the next implementation. 499 return AliasAnalysis::getModRefBehavior(F); 500 } 501 502 AliasAnalysis::ModRefResult 503 TypeBasedAliasAnalysis::getModRefInfo(ImmutableCallSite CS, 504 const Location &Loc) { 505 if (!EnableTBAA) 506 return AliasAnalysis::getModRefInfo(CS, Loc); 507 508 if (const MDNode *L = Loc.TBAATag) 509 if (const MDNode *M = 510 CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa)) 511 if (!Aliases(L, M)) 512 return NoModRef; 513 514 return AliasAnalysis::getModRefInfo(CS, Loc); 515 } 516 517 AliasAnalysis::ModRefResult 518 TypeBasedAliasAnalysis::getModRefInfo(ImmutableCallSite CS1, 519 ImmutableCallSite CS2) { 520 if (!EnableTBAA) 521 return AliasAnalysis::getModRefInfo(CS1, CS2); 522 523 if (const MDNode *M1 = 524 CS1.getInstruction()->getMetadata(LLVMContext::MD_tbaa)) 525 if (const MDNode *M2 = 526 CS2.getInstruction()->getMetadata(LLVMContext::MD_tbaa)) 527 if (!Aliases(M1, M2)) 528 return NoModRef; 529 530 return AliasAnalysis::getModRefInfo(CS1, CS2); 531 } 532 533 bool MDNode::isTBAAVtableAccess() const { 534 if (!isStructPathTBAA(this)) { 535 if (getNumOperands() < 1) return false; 536 if (MDString *Tag1 = dyn_cast<MDString>(getOperand(0))) { 537 if (Tag1->getString() == "vtable pointer") return true; 538 } 539 return false; 540 } 541 542 // For struct-path aware TBAA, we use the access type of the tag. 543 if (getNumOperands() < 2) return false; 544 MDNode *Tag = cast_or_null<MDNode>(getOperand(1)); 545 if (!Tag) return false; 546 if (MDString *Tag1 = dyn_cast<MDString>(Tag->getOperand(0))) { 547 if (Tag1->getString() == "vtable pointer") return true; 548 } 549 return false; 550 } 551 552 MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) { 553 if (!A || !B) 554 return NULL; 555 556 if (A == B) 557 return A; 558 559 // For struct-path aware TBAA, we use the access type of the tag. 560 bool StructPath = isStructPathTBAA(A); 561 if (StructPath) { 562 A = cast_or_null<MDNode>(A->getOperand(1)); 563 if (!A) return 0; 564 B = cast_or_null<MDNode>(B->getOperand(1)); 565 if (!B) return 0; 566 } 567 568 SmallVector<MDNode *, 4> PathA; 569 MDNode *T = A; 570 while (T) { 571 PathA.push_back(T); 572 T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1)) : 0; 573 } 574 575 SmallVector<MDNode *, 4> PathB; 576 T = B; 577 while (T) { 578 PathB.push_back(T); 579 T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1)) : 0; 580 } 581 582 int IA = PathA.size() - 1; 583 int IB = PathB.size() - 1; 584 585 MDNode *Ret = 0; 586 while (IA >= 0 && IB >=0) { 587 if (PathA[IA] == PathB[IB]) 588 Ret = PathA[IA]; 589 else 590 break; 591 --IA; 592 --IB; 593 } 594 if (!StructPath) 595 return Ret; 596 597 if (!Ret) 598 return 0; 599 // We need to convert from a type node to a tag node. 600 Type *Int64 = IntegerType::get(A->getContext(), 64); 601 Value *Ops[3] = { Ret, Ret, ConstantInt::get(Int64, 0) }; 602 return MDNode::get(A->getContext(), Ops); 603 } 604