1 //===- MergeFunctions.cpp - Merge identical functions ---------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass looks for equivalent functions that are mergable and folds them. 11 // 12 // A hash is computed from the function, based on its type and number of 13 // basic blocks. 14 // 15 // Once all hashes are computed, we perform an expensive equality comparison 16 // on each function pair. This takes n^2/2 comparisons per bucket, so it's 17 // important that the hash function be high quality. The equality comparison 18 // iterates through each instruction in each basic block. 19 // 20 // When a match is found the functions are folded. If both functions are 21 // overridable, we move the functionality into a new internal function and 22 // leave two overridable thunks to it. 23 // 24 //===----------------------------------------------------------------------===// 25 // 26 // Future work: 27 // 28 // * virtual functions. 29 // 30 // Many functions have their address taken by the virtual function table for 31 // the object they belong to. However, as long as it's only used for a lookup 32 // and call, this is irrelevant, and we'd like to fold such functions. 33 // 34 // * switch from n^2 pair-wise comparisons to an n-way comparison for each 35 // bucket. 36 // 37 // * be smarter about bitcasts. 38 // 39 // In order to fold functions, we will sometimes add either bitcast instructions 40 // or bitcast constant expressions. Unfortunately, this can confound further 41 // analysis since the two functions differ where one has a bitcast and the 42 // other doesn't. We should learn to look through bitcasts. 43 // 44 //===----------------------------------------------------------------------===// 45 46 #include "llvm/Transforms/IPO.h" 47 #include "llvm/ADT/DenseSet.h" 48 #include "llvm/ADT/FoldingSet.h" 49 #include "llvm/ADT/STLExtras.h" 50 #include "llvm/ADT/SmallSet.h" 51 #include "llvm/ADT/Statistic.h" 52 #include "llvm/IR/CallSite.h" 53 #include "llvm/IR/Constants.h" 54 #include "llvm/IR/DataLayout.h" 55 #include "llvm/IR/IRBuilder.h" 56 #include "llvm/IR/InlineAsm.h" 57 #include "llvm/IR/Instructions.h" 58 #include "llvm/IR/LLVMContext.h" 59 #include "llvm/IR/Module.h" 60 #include "llvm/IR/Operator.h" 61 #include "llvm/IR/ValueHandle.h" 62 #include "llvm/Pass.h" 63 #include "llvm/Support/Debug.h" 64 #include "llvm/Support/ErrorHandling.h" 65 #include "llvm/Support/raw_ostream.h" 66 #include <vector> 67 using namespace llvm; 68 69 #define DEBUG_TYPE "mergefunc" 70 71 STATISTIC(NumFunctionsMerged, "Number of functions merged"); 72 STATISTIC(NumThunksWritten, "Number of thunks generated"); 73 STATISTIC(NumAliasesWritten, "Number of aliases generated"); 74 STATISTIC(NumDoubleWeak, "Number of new functions created"); 75 76 /// Returns the type id for a type to be hashed. We turn pointer types into 77 /// integers here because the actual compare logic below considers pointers and 78 /// integers of the same size as equal. 79 static Type::TypeID getTypeIDForHash(Type *Ty) { 80 if (Ty->isPointerTy()) 81 return Type::IntegerTyID; 82 return Ty->getTypeID(); 83 } 84 85 /// Creates a hash-code for the function which is the same for any two 86 /// functions that will compare equal, without looking at the instructions 87 /// inside the function. 88 static unsigned profileFunction(const Function *F) { 89 FunctionType *FTy = F->getFunctionType(); 90 91 FoldingSetNodeID ID; 92 ID.AddInteger(F->size()); 93 ID.AddInteger(F->getCallingConv()); 94 ID.AddBoolean(F->hasGC()); 95 ID.AddBoolean(FTy->isVarArg()); 96 ID.AddInteger(getTypeIDForHash(FTy->getReturnType())); 97 for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) 98 ID.AddInteger(getTypeIDForHash(FTy->getParamType(i))); 99 return ID.ComputeHash(); 100 } 101 102 namespace { 103 104 /// ComparableFunction - A struct that pairs together functions with a 105 /// DataLayout so that we can keep them together as elements in the DenseSet. 106 class ComparableFunction { 107 public: 108 static const ComparableFunction EmptyKey; 109 static const ComparableFunction TombstoneKey; 110 static DataLayout * const LookupOnly; 111 112 ComparableFunction(Function *Func, const DataLayout *DL) 113 : Func(Func), Hash(profileFunction(Func)), DL(DL) {} 114 115 Function *getFunc() const { return Func; } 116 unsigned getHash() const { return Hash; } 117 const DataLayout *getDataLayout() const { return DL; } 118 119 // Drops AssertingVH reference to the function. Outside of debug mode, this 120 // does nothing. 121 void release() { 122 assert(Func && 123 "Attempted to release function twice, or release empty/tombstone!"); 124 Func = nullptr; 125 } 126 127 private: 128 explicit ComparableFunction(unsigned Hash) 129 : Func(nullptr), Hash(Hash), DL(nullptr) {} 130 131 AssertingVH<Function> Func; 132 unsigned Hash; 133 const DataLayout *DL; 134 }; 135 136 const ComparableFunction ComparableFunction::EmptyKey = ComparableFunction(0); 137 const ComparableFunction ComparableFunction::TombstoneKey = 138 ComparableFunction(1); 139 DataLayout *const ComparableFunction::LookupOnly = (DataLayout*)(-1); 140 141 } 142 143 namespace llvm { 144 template <> 145 struct DenseMapInfo<ComparableFunction> { 146 static ComparableFunction getEmptyKey() { 147 return ComparableFunction::EmptyKey; 148 } 149 static ComparableFunction getTombstoneKey() { 150 return ComparableFunction::TombstoneKey; 151 } 152 static unsigned getHashValue(const ComparableFunction &CF) { 153 return CF.getHash(); 154 } 155 static bool isEqual(const ComparableFunction &LHS, 156 const ComparableFunction &RHS); 157 }; 158 } 159 160 namespace { 161 162 /// FunctionComparator - Compares two functions to determine whether or not 163 /// they will generate machine code with the same behaviour. DataLayout is 164 /// used if available. The comparator always fails conservatively (erring on the 165 /// side of claiming that two functions are different). 166 class FunctionComparator { 167 public: 168 FunctionComparator(const DataLayout *DL, const Function *F1, 169 const Function *F2) 170 : F1(F1), F2(F2), DL(DL) {} 171 172 /// Test whether the two functions have equivalent behaviour. 173 bool compare(); 174 175 private: 176 /// Test whether two basic blocks have equivalent behaviour. 177 bool compare(const BasicBlock *BB1, const BasicBlock *BB2); 178 179 /// Constants comparison. 180 /// Its analog to lexicographical comparison between hypothetical numbers 181 /// of next format: 182 /// <bitcastability-trait><raw-bit-contents> 183 /// 184 /// 1. Bitcastability. 185 /// Check whether L's type could be losslessly bitcasted to R's type. 186 /// On this stage method, in case when lossless bitcast is not possible 187 /// method returns -1 or 1, thus also defining which type is greater in 188 /// context of bitcastability. 189 /// Stage 0: If types are equal in terms of cmpTypes, then we can go straight 190 /// to the contents comparison. 191 /// If types differ, remember types comparison result and check 192 /// whether we still can bitcast types. 193 /// Stage 1: Types that satisfies isFirstClassType conditions are always 194 /// greater then others. 195 /// Stage 2: Vector is greater then non-vector. 196 /// If both types are vectors, then vector with greater bitwidth is 197 /// greater. 198 /// If both types are vectors with the same bitwidth, then types 199 /// are bitcastable, and we can skip other stages, and go to contents 200 /// comparison. 201 /// Stage 3: Pointer types are greater than non-pointers. If both types are 202 /// pointers of the same address space - go to contents comparison. 203 /// Different address spaces: pointer with greater address space is 204 /// greater. 205 /// Stage 4: Types are neither vectors, nor pointers. And they differ. 206 /// We don't know how to bitcast them. So, we better don't do it, 207 /// and return types comparison result (so it determines the 208 /// relationship among constants we don't know how to bitcast). 209 /// 210 /// Just for clearance, let's see how the set of constants could look 211 /// on single dimension axis: 212 /// 213 /// [NFCT], [FCT, "others"], [FCT, pointers], [FCT, vectors] 214 /// Where: NFCT - Not a FirstClassType 215 /// FCT - FirstClassTyp: 216 /// 217 /// 2. Compare raw contents. 218 /// It ignores types on this stage and only compares bits from L and R. 219 /// Returns 0, if L and R has equivalent contents. 220 /// -1 or 1 if values are different. 221 /// Pretty trivial: 222 /// 2.1. If contents are numbers, compare numbers. 223 /// Ints with greater bitwidth are greater. Ints with same bitwidths 224 /// compared by their contents. 225 /// 2.2. "And so on". Just to avoid discrepancies with comments 226 /// perhaps it would be better to read the implementation itself. 227 /// 3. And again about overall picture. Let's look back at how the ordered set 228 /// of constants will look like: 229 /// [NFCT], [FCT, "others"], [FCT, pointers], [FCT, vectors] 230 /// 231 /// Now look, what could be inside [FCT, "others"], for example: 232 /// [FCT, "others"] = 233 /// [ 234 /// [double 0.1], [double 1.23], 235 /// [i32 1], [i32 2], 236 /// { double 1.0 }, ; StructTyID, NumElements = 1 237 /// { i32 1 }, ; StructTyID, NumElements = 1 238 /// { double 1, i32 1 }, ; StructTyID, NumElements = 2 239 /// { i32 1, double 1 } ; StructTyID, NumElements = 2 240 /// ] 241 /// 242 /// Let's explain the order. Float numbers will be less than integers, just 243 /// because of cmpType terms: FloatTyID < IntegerTyID. 244 /// Floats (with same fltSemantics) are sorted according to their value. 245 /// Then you can see integers, and they are, like a floats, 246 /// could be easy sorted among each others. 247 /// The structures. Structures are grouped at the tail, again because of their 248 /// TypeID: StructTyID > IntegerTyID > FloatTyID. 249 /// Structures with greater number of elements are greater. Structures with 250 /// greater elements going first are greater. 251 /// The same logic with vectors, arrays and other possible complex types. 252 /// 253 /// Bitcastable constants. 254 /// Let's assume, that some constant, belongs to some group of 255 /// "so-called-equal" values with different types, and at the same time 256 /// belongs to another group of constants with equal types 257 /// and "really" equal values. 258 /// 259 /// Now, prove that this is impossible: 260 /// 261 /// If constant A with type TyA is bitcastable to B with type TyB, then: 262 /// 1. All constants with equal types to TyA, are bitcastable to B. Since 263 /// those should be vectors (if TyA is vector), pointers 264 /// (if TyA is pointer), or else (if TyA equal to TyB), those types should 265 /// be equal to TyB. 266 /// 2. All constants with non-equal, but bitcastable types to TyA, are 267 /// bitcastable to B. 268 /// Once again, just because we allow it to vectors and pointers only. 269 /// This statement could be expanded as below: 270 /// 2.1. All vectors with equal bitwidth to vector A, has equal bitwidth to 271 /// vector B, and thus bitcastable to B as well. 272 /// 2.2. All pointers of the same address space, no matter what they point to, 273 /// bitcastable. So if C is pointer, it could be bitcasted to A and to B. 274 /// So any constant equal or bitcastable to A is equal or bitcastable to B. 275 /// QED. 276 /// 277 /// In another words, for pointers and vectors, we ignore top-level type and 278 /// look at their particular properties (bit-width for vectors, and 279 /// address space for pointers). 280 /// If these properties are equal - compare their contents. 281 int cmpConstants(const Constant *L, const Constant *R); 282 283 /// Assign or look up previously assigned numbers for the two values, and 284 /// return whether the numbers are equal. Numbers are assigned in the order 285 /// visited. 286 /// Comparison order: 287 /// Stage 0: Value that is function itself is always greater then others. 288 /// If left and right values are references to their functions, then 289 /// they are equal. 290 /// Stage 1: Constants are greater than non-constants. 291 /// If both left and right are constants, then the result of 292 /// cmpConstants is used as cmpValues result. 293 /// Stage 2: InlineAsm instances are greater than others. If both left and 294 /// right are InlineAsm instances, InlineAsm* pointers casted to 295 /// integers and compared as numbers. 296 /// Stage 3: For all other cases we compare order we meet these values in 297 /// their functions. If right value was met first during scanning, 298 /// then left value is greater. 299 /// In another words, we compare serial numbers, for more details 300 /// see comments for sn_mapL and sn_mapR. 301 int cmpValues(const Value *L, const Value *R); 302 303 bool enumerate(const Value *V1, const Value *V2) { 304 return cmpValues(V1, V2) == 0; 305 } 306 307 /// Compare two Instructions for equivalence, similar to 308 /// Instruction::isSameOperationAs but with modifications to the type 309 /// comparison. 310 /// Stages are listed in "most significant stage first" order: 311 /// On each stage below, we do comparison between some left and right 312 /// operation parts. If parts are non-equal, we assign parts comparison 313 /// result to the operation comparison result and exit from method. 314 /// Otherwise we proceed to the next stage. 315 /// Stages: 316 /// 1. Operations opcodes. Compared as numbers. 317 /// 2. Number of operands. 318 /// 3. Operation types. Compared with cmpType method. 319 /// 4. Compare operation subclass optional data as stream of bytes: 320 /// just convert it to integers and call cmpNumbers. 321 /// 5. Compare in operation operand types with cmpType in 322 /// most significant operand first order. 323 /// 6. Last stage. Check operations for some specific attributes. 324 /// For example, for Load it would be: 325 /// 6.1.Load: volatile (as boolean flag) 326 /// 6.2.Load: alignment (as integer numbers) 327 /// 6.3.Load: synch-scope (as integer numbers) 328 /// On this stage its better to see the code, since its not more than 10-15 329 /// strings for particular instruction, and could change sometimes. 330 int cmpOperation(const Instruction *L, const Instruction *R) const; 331 332 bool isEquivalentOperation(const Instruction *I1, 333 const Instruction *I2) const { 334 return cmpOperation(I1, I2) == 0; 335 } 336 337 /// Compare two GEPs for equivalent pointer arithmetic. 338 /// Parts to be compared for each comparison stage, 339 /// most significant stage first: 340 /// 1. Address space. As numbers. 341 /// 2. Constant offset, (if "DataLayout *DL" field is not NULL, 342 /// using GEPOperator::accumulateConstantOffset method). 343 /// 3. Pointer operand type (using cmpType method). 344 /// 4. Number of operands. 345 /// 5. Compare operands, using cmpValues method. 346 int cmpGEP(const GEPOperator *GEPL, const GEPOperator *GEPR); 347 int cmpGEP(const GetElementPtrInst *GEPL, const GetElementPtrInst *GEPR) { 348 return cmpGEP(cast<GEPOperator>(GEPL), cast<GEPOperator>(GEPR)); 349 } 350 351 bool isEquivalentGEP(const GEPOperator *GEP1, const GEPOperator *GEP2) { 352 return cmpGEP(GEP1, GEP2) == 0; 353 } 354 bool isEquivalentGEP(const GetElementPtrInst *GEP1, 355 const GetElementPtrInst *GEP2) { 356 return isEquivalentGEP(cast<GEPOperator>(GEP1), cast<GEPOperator>(GEP2)); 357 } 358 359 /// cmpType - compares two types, 360 /// defines total ordering among the types set. 361 /// 362 /// Return values: 363 /// 0 if types are equal, 364 /// -1 if Left is less than Right, 365 /// +1 if Left is greater than Right. 366 /// 367 /// Description: 368 /// Comparison is broken onto stages. Like in lexicographical comparison 369 /// stage coming first has higher priority. 370 /// On each explanation stage keep in mind total ordering properties. 371 /// 372 /// 0. Before comparison we coerce pointer types of 0 address space to 373 /// integer. 374 /// We also don't bother with same type at left and right, so 375 /// just return 0 in this case. 376 /// 377 /// 1. If types are of different kind (different type IDs). 378 /// Return result of type IDs comparison, treating them as numbers. 379 /// 2. If types are vectors or integers, compare Type* values as numbers. 380 /// 3. Types has same ID, so check whether they belongs to the next group: 381 /// * Void 382 /// * Float 383 /// * Double 384 /// * X86_FP80 385 /// * FP128 386 /// * PPC_FP128 387 /// * Label 388 /// * Metadata 389 /// If so - return 0, yes - we can treat these types as equal only because 390 /// their IDs are same. 391 /// 4. If Left and Right are pointers, return result of address space 392 /// comparison (numbers comparison). We can treat pointer types of same 393 /// address space as equal. 394 /// 5. If types are complex. 395 /// Then both Left and Right are to be expanded and their element types will 396 /// be checked with the same way. If we get Res != 0 on some stage, return it. 397 /// Otherwise return 0. 398 /// 6. For all other cases put llvm_unreachable. 399 int cmpType(Type *TyL, Type *TyR) const; 400 401 bool isEquivalentType(Type *Ty1, Type *Ty2) const { 402 return cmpType(Ty1, Ty2) == 0; 403 } 404 405 int cmpNumbers(uint64_t L, uint64_t R) const; 406 407 int cmpAPInt(const APInt &L, const APInt &R) const; 408 int cmpAPFloat(const APFloat &L, const APFloat &R) const; 409 int cmpStrings(StringRef L, StringRef R) const; 410 int cmpAttrs(const AttributeSet L, const AttributeSet R) const; 411 412 // The two functions undergoing comparison. 413 const Function *F1, *F2; 414 415 const DataLayout *DL; 416 417 /// Assign serial numbers to values from left function, and values from 418 /// right function. 419 /// Explanation: 420 /// Being comparing functions we need to compare values we meet at left and 421 /// right sides. 422 /// Its easy to sort things out for external values. It just should be 423 /// the same value at left and right. 424 /// But for local values (those were introduced inside function body) 425 /// we have to ensure they were introduced at exactly the same place, 426 /// and plays the same role. 427 /// Let's assign serial number to each value when we meet it first time. 428 /// Values that were met at same place will be with same serial numbers. 429 /// In this case it would be good to explain few points about values assigned 430 /// to BBs and other ways of implementation (see below). 431 /// 432 /// 1. Safety of BB reordering. 433 /// It's safe to change the order of BasicBlocks in function. 434 /// Relationship with other functions and serial numbering will not be 435 /// changed in this case. 436 /// As follows from FunctionComparator::compare(), we do CFG walk: we start 437 /// from the entry, and then take each terminator. So it doesn't matter how in 438 /// fact BBs are ordered in function. And since cmpValues are called during 439 /// this walk, the numbering depends only on how BBs located inside the CFG. 440 /// So the answer is - yes. We will get the same numbering. 441 /// 442 /// 2. Impossibility to use dominance properties of values. 443 /// If we compare two instruction operands: first is usage of local 444 /// variable AL from function FL, and second is usage of local variable AR 445 /// from FR, we could compare their origins and check whether they are 446 /// defined at the same place. 447 /// But, we are still not able to compare operands of PHI nodes, since those 448 /// could be operands from further BBs we didn't scan yet. 449 /// So it's impossible to use dominance properties in general. 450 DenseMap<const Value*, int> sn_mapL, sn_mapR; 451 }; 452 453 } 454 455 int FunctionComparator::cmpNumbers(uint64_t L, uint64_t R) const { 456 if (L < R) return -1; 457 if (L > R) return 1; 458 return 0; 459 } 460 461 int FunctionComparator::cmpAPInt(const APInt &L, const APInt &R) const { 462 if (int Res = cmpNumbers(L.getBitWidth(), R.getBitWidth())) 463 return Res; 464 if (L.ugt(R)) return 1; 465 if (R.ugt(L)) return -1; 466 return 0; 467 } 468 469 int FunctionComparator::cmpAPFloat(const APFloat &L, const APFloat &R) const { 470 if (int Res = cmpNumbers((uint64_t)&L.getSemantics(), 471 (uint64_t)&R.getSemantics())) 472 return Res; 473 return cmpAPInt(L.bitcastToAPInt(), R.bitcastToAPInt()); 474 } 475 476 int FunctionComparator::cmpStrings(StringRef L, StringRef R) const { 477 // Prevent heavy comparison, compare sizes first. 478 if (int Res = cmpNumbers(L.size(), R.size())) 479 return Res; 480 481 // Compare strings lexicographically only when it is necessary: only when 482 // strings are equal in size. 483 return L.compare(R); 484 } 485 486 int FunctionComparator::cmpAttrs(const AttributeSet L, 487 const AttributeSet R) const { 488 if (int Res = cmpNumbers(L.getNumSlots(), R.getNumSlots())) 489 return Res; 490 491 for (unsigned i = 0, e = L.getNumSlots(); i != e; ++i) { 492 AttributeSet::iterator LI = L.begin(i), LE = L.end(i), RI = R.begin(i), 493 RE = R.end(i); 494 for (; LI != LE && RI != RE; ++LI, ++RI) { 495 Attribute LA = *LI; 496 Attribute RA = *RI; 497 if (LA < RA) 498 return -1; 499 if (RA < LA) 500 return 1; 501 } 502 if (LI != LE) 503 return 1; 504 if (RI != RE) 505 return -1; 506 } 507 return 0; 508 } 509 510 /// Constants comparison: 511 /// 1. Check whether type of L constant could be losslessly bitcasted to R 512 /// type. 513 /// 2. Compare constant contents. 514 /// For more details see declaration comments. 515 int FunctionComparator::cmpConstants(const Constant *L, const Constant *R) { 516 517 Type *TyL = L->getType(); 518 Type *TyR = R->getType(); 519 520 // Check whether types are bitcastable. This part is just re-factored 521 // Type::canLosslesslyBitCastTo method, but instead of returning true/false, 522 // we also pack into result which type is "less" for us. 523 int TypesRes = cmpType(TyL, TyR); 524 if (TypesRes != 0) { 525 // Types are different, but check whether we can bitcast them. 526 if (!TyL->isFirstClassType()) { 527 if (TyR->isFirstClassType()) 528 return -1; 529 // Neither TyL nor TyR are values of first class type. Return the result 530 // of comparing the types 531 return TypesRes; 532 } 533 if (!TyR->isFirstClassType()) { 534 if (TyL->isFirstClassType()) 535 return 1; 536 return TypesRes; 537 } 538 539 // Vector -> Vector conversions are always lossless if the two vector types 540 // have the same size, otherwise not. 541 unsigned TyLWidth = 0; 542 unsigned TyRWidth = 0; 543 544 if (const VectorType *VecTyL = dyn_cast<VectorType>(TyL)) 545 TyLWidth = VecTyL->getBitWidth(); 546 if (const VectorType *VecTyR = dyn_cast<VectorType>(TyR)) 547 TyRWidth = VecTyR->getBitWidth(); 548 549 if (TyLWidth != TyRWidth) 550 return cmpNumbers(TyLWidth, TyRWidth); 551 552 // Zero bit-width means neither TyL nor TyR are vectors. 553 if (!TyLWidth) { 554 PointerType *PTyL = dyn_cast<PointerType>(TyL); 555 PointerType *PTyR = dyn_cast<PointerType>(TyR); 556 if (PTyL && PTyR) { 557 unsigned AddrSpaceL = PTyL->getAddressSpace(); 558 unsigned AddrSpaceR = PTyR->getAddressSpace(); 559 if (int Res = cmpNumbers(AddrSpaceL, AddrSpaceR)) 560 return Res; 561 } 562 if (PTyL) 563 return 1; 564 if (PTyR) 565 return -1; 566 567 // TyL and TyR aren't vectors, nor pointers. We don't know how to 568 // bitcast them. 569 return TypesRes; 570 } 571 } 572 573 // OK, types are bitcastable, now check constant contents. 574 575 if (L->isNullValue() && R->isNullValue()) 576 return TypesRes; 577 if (L->isNullValue() && !R->isNullValue()) 578 return 1; 579 if (!L->isNullValue() && R->isNullValue()) 580 return -1; 581 582 if (int Res = cmpNumbers(L->getValueID(), R->getValueID())) 583 return Res; 584 585 switch (L->getValueID()) { 586 case Value::UndefValueVal: return TypesRes; 587 case Value::ConstantIntVal: { 588 const APInt &LInt = cast<ConstantInt>(L)->getValue(); 589 const APInt &RInt = cast<ConstantInt>(R)->getValue(); 590 return cmpAPInt(LInt, RInt); 591 } 592 case Value::ConstantFPVal: { 593 const APFloat &LAPF = cast<ConstantFP>(L)->getValueAPF(); 594 const APFloat &RAPF = cast<ConstantFP>(R)->getValueAPF(); 595 return cmpAPFloat(LAPF, RAPF); 596 } 597 case Value::ConstantArrayVal: { 598 const ConstantArray *LA = cast<ConstantArray>(L); 599 const ConstantArray *RA = cast<ConstantArray>(R); 600 uint64_t NumElementsL = cast<ArrayType>(TyL)->getNumElements(); 601 uint64_t NumElementsR = cast<ArrayType>(TyR)->getNumElements(); 602 if (int Res = cmpNumbers(NumElementsL, NumElementsR)) 603 return Res; 604 for (uint64_t i = 0; i < NumElementsL; ++i) { 605 if (int Res = cmpConstants(cast<Constant>(LA->getOperand(i)), 606 cast<Constant>(RA->getOperand(i)))) 607 return Res; 608 } 609 return 0; 610 } 611 case Value::ConstantStructVal: { 612 const ConstantStruct *LS = cast<ConstantStruct>(L); 613 const ConstantStruct *RS = cast<ConstantStruct>(R); 614 unsigned NumElementsL = cast<StructType>(TyL)->getNumElements(); 615 unsigned NumElementsR = cast<StructType>(TyR)->getNumElements(); 616 if (int Res = cmpNumbers(NumElementsL, NumElementsR)) 617 return Res; 618 for (unsigned i = 0; i != NumElementsL; ++i) { 619 if (int Res = cmpConstants(cast<Constant>(LS->getOperand(i)), 620 cast<Constant>(RS->getOperand(i)))) 621 return Res; 622 } 623 return 0; 624 } 625 case Value::ConstantVectorVal: { 626 const ConstantVector *LV = cast<ConstantVector>(L); 627 const ConstantVector *RV = cast<ConstantVector>(R); 628 unsigned NumElementsL = cast<VectorType>(TyL)->getNumElements(); 629 unsigned NumElementsR = cast<VectorType>(TyR)->getNumElements(); 630 if (int Res = cmpNumbers(NumElementsL, NumElementsR)) 631 return Res; 632 for (uint64_t i = 0; i < NumElementsL; ++i) { 633 if (int Res = cmpConstants(cast<Constant>(LV->getOperand(i)), 634 cast<Constant>(RV->getOperand(i)))) 635 return Res; 636 } 637 return 0; 638 } 639 case Value::ConstantExprVal: { 640 const ConstantExpr *LE = cast<ConstantExpr>(L); 641 const ConstantExpr *RE = cast<ConstantExpr>(R); 642 unsigned NumOperandsL = LE->getNumOperands(); 643 unsigned NumOperandsR = RE->getNumOperands(); 644 if (int Res = cmpNumbers(NumOperandsL, NumOperandsR)) 645 return Res; 646 for (unsigned i = 0; i < NumOperandsL; ++i) { 647 if (int Res = cmpConstants(cast<Constant>(LE->getOperand(i)), 648 cast<Constant>(RE->getOperand(i)))) 649 return Res; 650 } 651 return 0; 652 } 653 case Value::FunctionVal: 654 case Value::GlobalVariableVal: 655 case Value::GlobalAliasVal: 656 default: // Unknown constant, cast L and R pointers to numbers and compare. 657 return cmpNumbers((uint64_t)L, (uint64_t)R); 658 } 659 } 660 661 /// cmpType - compares two types, 662 /// defines total ordering among the types set. 663 /// See method declaration comments for more details. 664 int FunctionComparator::cmpType(Type *TyL, Type *TyR) const { 665 666 PointerType *PTyL = dyn_cast<PointerType>(TyL); 667 PointerType *PTyR = dyn_cast<PointerType>(TyR); 668 669 if (DL) { 670 if (PTyL && PTyL->getAddressSpace() == 0) TyL = DL->getIntPtrType(TyL); 671 if (PTyR && PTyR->getAddressSpace() == 0) TyR = DL->getIntPtrType(TyR); 672 } 673 674 if (TyL == TyR) 675 return 0; 676 677 if (int Res = cmpNumbers(TyL->getTypeID(), TyR->getTypeID())) 678 return Res; 679 680 switch (TyL->getTypeID()) { 681 default: 682 llvm_unreachable("Unknown type!"); 683 // Fall through in Release mode. 684 case Type::IntegerTyID: 685 case Type::VectorTyID: 686 // TyL == TyR would have returned true earlier. 687 return cmpNumbers((uint64_t)TyL, (uint64_t)TyR); 688 689 case Type::VoidTyID: 690 case Type::FloatTyID: 691 case Type::DoubleTyID: 692 case Type::X86_FP80TyID: 693 case Type::FP128TyID: 694 case Type::PPC_FP128TyID: 695 case Type::LabelTyID: 696 case Type::MetadataTyID: 697 return 0; 698 699 case Type::PointerTyID: { 700 assert(PTyL && PTyR && "Both types must be pointers here."); 701 return cmpNumbers(PTyL->getAddressSpace(), PTyR->getAddressSpace()); 702 } 703 704 case Type::StructTyID: { 705 StructType *STyL = cast<StructType>(TyL); 706 StructType *STyR = cast<StructType>(TyR); 707 if (STyL->getNumElements() != STyR->getNumElements()) 708 return cmpNumbers(STyL->getNumElements(), STyR->getNumElements()); 709 710 if (STyL->isPacked() != STyR->isPacked()) 711 return cmpNumbers(STyL->isPacked(), STyR->isPacked()); 712 713 for (unsigned i = 0, e = STyL->getNumElements(); i != e; ++i) { 714 if (int Res = cmpType(STyL->getElementType(i), 715 STyR->getElementType(i))) 716 return Res; 717 } 718 return 0; 719 } 720 721 case Type::FunctionTyID: { 722 FunctionType *FTyL = cast<FunctionType>(TyL); 723 FunctionType *FTyR = cast<FunctionType>(TyR); 724 if (FTyL->getNumParams() != FTyR->getNumParams()) 725 return cmpNumbers(FTyL->getNumParams(), FTyR->getNumParams()); 726 727 if (FTyL->isVarArg() != FTyR->isVarArg()) 728 return cmpNumbers(FTyL->isVarArg(), FTyR->isVarArg()); 729 730 if (int Res = cmpType(FTyL->getReturnType(), FTyR->getReturnType())) 731 return Res; 732 733 for (unsigned i = 0, e = FTyL->getNumParams(); i != e; ++i) { 734 if (int Res = cmpType(FTyL->getParamType(i), FTyR->getParamType(i))) 735 return Res; 736 } 737 return 0; 738 } 739 740 case Type::ArrayTyID: { 741 ArrayType *ATyL = cast<ArrayType>(TyL); 742 ArrayType *ATyR = cast<ArrayType>(TyR); 743 if (ATyL->getNumElements() != ATyR->getNumElements()) 744 return cmpNumbers(ATyL->getNumElements(), ATyR->getNumElements()); 745 return cmpType(ATyL->getElementType(), ATyR->getElementType()); 746 } 747 } 748 } 749 750 // Determine whether the two operations are the same except that pointer-to-A 751 // and pointer-to-B are equivalent. This should be kept in sync with 752 // Instruction::isSameOperationAs. 753 // Read method declaration comments for more details. 754 int FunctionComparator::cmpOperation(const Instruction *L, 755 const Instruction *R) const { 756 // Differences from Instruction::isSameOperationAs: 757 // * replace type comparison with calls to isEquivalentType. 758 // * we test for I->hasSameSubclassOptionalData (nuw/nsw/tail) at the top 759 // * because of the above, we don't test for the tail bit on calls later on 760 if (int Res = cmpNumbers(L->getOpcode(), R->getOpcode())) 761 return Res; 762 763 if (int Res = cmpNumbers(L->getNumOperands(), R->getNumOperands())) 764 return Res; 765 766 if (int Res = cmpType(L->getType(), R->getType())) 767 return Res; 768 769 if (int Res = cmpNumbers(L->getRawSubclassOptionalData(), 770 R->getRawSubclassOptionalData())) 771 return Res; 772 773 // We have two instructions of identical opcode and #operands. Check to see 774 // if all operands are the same type 775 for (unsigned i = 0, e = L->getNumOperands(); i != e; ++i) { 776 if (int Res = 777 cmpType(L->getOperand(i)->getType(), R->getOperand(i)->getType())) 778 return Res; 779 } 780 781 // Check special state that is a part of some instructions. 782 if (const LoadInst *LI = dyn_cast<LoadInst>(L)) { 783 if (int Res = cmpNumbers(LI->isVolatile(), cast<LoadInst>(R)->isVolatile())) 784 return Res; 785 if (int Res = 786 cmpNumbers(LI->getAlignment(), cast<LoadInst>(R)->getAlignment())) 787 return Res; 788 if (int Res = 789 cmpNumbers(LI->getOrdering(), cast<LoadInst>(R)->getOrdering())) 790 return Res; 791 return cmpNumbers(LI->getSynchScope(), cast<LoadInst>(R)->getSynchScope()); 792 } 793 if (const StoreInst *SI = dyn_cast<StoreInst>(L)) { 794 if (int Res = 795 cmpNumbers(SI->isVolatile(), cast<StoreInst>(R)->isVolatile())) 796 return Res; 797 if (int Res = 798 cmpNumbers(SI->getAlignment(), cast<StoreInst>(R)->getAlignment())) 799 return Res; 800 if (int Res = 801 cmpNumbers(SI->getOrdering(), cast<StoreInst>(R)->getOrdering())) 802 return Res; 803 return cmpNumbers(SI->getSynchScope(), cast<StoreInst>(R)->getSynchScope()); 804 } 805 if (const CmpInst *CI = dyn_cast<CmpInst>(L)) 806 return cmpNumbers(CI->getPredicate(), cast<CmpInst>(R)->getPredicate()); 807 if (const CallInst *CI = dyn_cast<CallInst>(L)) { 808 if (int Res = cmpNumbers(CI->getCallingConv(), 809 cast<CallInst>(R)->getCallingConv())) 810 return Res; 811 return cmpAttrs(CI->getAttributes(), cast<CallInst>(R)->getAttributes()); 812 } 813 if (const InvokeInst *CI = dyn_cast<InvokeInst>(L)) { 814 if (int Res = cmpNumbers(CI->getCallingConv(), 815 cast<InvokeInst>(R)->getCallingConv())) 816 return Res; 817 return cmpAttrs(CI->getAttributes(), cast<InvokeInst>(R)->getAttributes()); 818 } 819 if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(L)) { 820 ArrayRef<unsigned> LIndices = IVI->getIndices(); 821 ArrayRef<unsigned> RIndices = cast<InsertValueInst>(R)->getIndices(); 822 if (int Res = cmpNumbers(LIndices.size(), RIndices.size())) 823 return Res; 824 for (size_t i = 0, e = LIndices.size(); i != e; ++i) { 825 if (int Res = cmpNumbers(LIndices[i], RIndices[i])) 826 return Res; 827 } 828 } 829 if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(L)) { 830 ArrayRef<unsigned> LIndices = EVI->getIndices(); 831 ArrayRef<unsigned> RIndices = cast<ExtractValueInst>(R)->getIndices(); 832 if (int Res = cmpNumbers(LIndices.size(), RIndices.size())) 833 return Res; 834 for (size_t i = 0, e = LIndices.size(); i != e; ++i) { 835 if (int Res = cmpNumbers(LIndices[i], RIndices[i])) 836 return Res; 837 } 838 } 839 if (const FenceInst *FI = dyn_cast<FenceInst>(L)) { 840 if (int Res = 841 cmpNumbers(FI->getOrdering(), cast<FenceInst>(R)->getOrdering())) 842 return Res; 843 return cmpNumbers(FI->getSynchScope(), cast<FenceInst>(R)->getSynchScope()); 844 } 845 846 if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(L)) { 847 if (int Res = cmpNumbers(CXI->isVolatile(), 848 cast<AtomicCmpXchgInst>(R)->isVolatile())) 849 return Res; 850 if (int Res = cmpNumbers(CXI->isWeak(), 851 cast<AtomicCmpXchgInst>(R)->isWeak())) 852 return Res; 853 if (int Res = cmpNumbers(CXI->getSuccessOrdering(), 854 cast<AtomicCmpXchgInst>(R)->getSuccessOrdering())) 855 return Res; 856 if (int Res = cmpNumbers(CXI->getFailureOrdering(), 857 cast<AtomicCmpXchgInst>(R)->getFailureOrdering())) 858 return Res; 859 return cmpNumbers(CXI->getSynchScope(), 860 cast<AtomicCmpXchgInst>(R)->getSynchScope()); 861 } 862 if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(L)) { 863 if (int Res = cmpNumbers(RMWI->getOperation(), 864 cast<AtomicRMWInst>(R)->getOperation())) 865 return Res; 866 if (int Res = cmpNumbers(RMWI->isVolatile(), 867 cast<AtomicRMWInst>(R)->isVolatile())) 868 return Res; 869 if (int Res = cmpNumbers(RMWI->getOrdering(), 870 cast<AtomicRMWInst>(R)->getOrdering())) 871 return Res; 872 return cmpNumbers(RMWI->getSynchScope(), 873 cast<AtomicRMWInst>(R)->getSynchScope()); 874 } 875 return 0; 876 } 877 878 // Determine whether two GEP operations perform the same underlying arithmetic. 879 // Read method declaration comments for more details. 880 int FunctionComparator::cmpGEP(const GEPOperator *GEPL, 881 const GEPOperator *GEPR) { 882 883 unsigned int ASL = GEPL->getPointerAddressSpace(); 884 unsigned int ASR = GEPR->getPointerAddressSpace(); 885 886 if (int Res = cmpNumbers(ASL, ASR)) 887 return Res; 888 889 // When we have target data, we can reduce the GEP down to the value in bytes 890 // added to the address. 891 if (DL) { 892 unsigned BitWidth = DL->getPointerSizeInBits(ASL); 893 APInt OffsetL(BitWidth, 0), OffsetR(BitWidth, 0); 894 if (GEPL->accumulateConstantOffset(*DL, OffsetL) && 895 GEPR->accumulateConstantOffset(*DL, OffsetR)) 896 return cmpAPInt(OffsetL, OffsetR); 897 } 898 899 if (int Res = cmpNumbers((uint64_t)GEPL->getPointerOperand()->getType(), 900 (uint64_t)GEPR->getPointerOperand()->getType())) 901 return Res; 902 903 if (int Res = cmpNumbers(GEPL->getNumOperands(), GEPR->getNumOperands())) 904 return Res; 905 906 for (unsigned i = 0, e = GEPL->getNumOperands(); i != e; ++i) { 907 if (int Res = cmpValues(GEPL->getOperand(i), GEPR->getOperand(i))) 908 return Res; 909 } 910 911 return 0; 912 } 913 914 /// Compare two values used by the two functions under pair-wise comparison. If 915 /// this is the first time the values are seen, they're added to the mapping so 916 /// that we will detect mismatches on next use. 917 /// See comments in declaration for more details. 918 int FunctionComparator::cmpValues(const Value *L, const Value *R) { 919 // Catch self-reference case. 920 if (L == F1) { 921 if (R == F2) 922 return 0; 923 return -1; 924 } 925 if (R == F2) { 926 if (L == F1) 927 return 0; 928 return 1; 929 } 930 931 const Constant *ConstL = dyn_cast<Constant>(L); 932 const Constant *ConstR = dyn_cast<Constant>(R); 933 if (ConstL && ConstR) { 934 if (L == R) 935 return 0; 936 return cmpConstants(ConstL, ConstR); 937 } 938 939 if (ConstL) 940 return 1; 941 if (ConstR) 942 return -1; 943 944 const InlineAsm *InlineAsmL = dyn_cast<InlineAsm>(L); 945 const InlineAsm *InlineAsmR = dyn_cast<InlineAsm>(R); 946 947 if (InlineAsmL && InlineAsmR) 948 return cmpNumbers((uint64_t)L, (uint64_t)R); 949 if (InlineAsmL) 950 return 1; 951 if (InlineAsmR) 952 return -1; 953 954 auto LeftSN = sn_mapL.insert(std::make_pair(L, sn_mapL.size())), 955 RightSN = sn_mapR.insert(std::make_pair(R, sn_mapR.size())); 956 957 return cmpNumbers(LeftSN.first->second, RightSN.first->second); 958 } 959 // Test whether two basic blocks have equivalent behaviour. 960 bool FunctionComparator::compare(const BasicBlock *BB1, const BasicBlock *BB2) { 961 BasicBlock::const_iterator F1I = BB1->begin(), F1E = BB1->end(); 962 BasicBlock::const_iterator F2I = BB2->begin(), F2E = BB2->end(); 963 964 do { 965 if (!enumerate(F1I, F2I)) 966 return false; 967 968 if (const GetElementPtrInst *GEP1 = dyn_cast<GetElementPtrInst>(F1I)) { 969 const GetElementPtrInst *GEP2 = dyn_cast<GetElementPtrInst>(F2I); 970 if (!GEP2) 971 return false; 972 973 if (!enumerate(GEP1->getPointerOperand(), GEP2->getPointerOperand())) 974 return false; 975 976 if (!isEquivalentGEP(GEP1, GEP2)) 977 return false; 978 } else { 979 if (!isEquivalentOperation(F1I, F2I)) 980 return false; 981 982 assert(F1I->getNumOperands() == F2I->getNumOperands()); 983 for (unsigned i = 0, e = F1I->getNumOperands(); i != e; ++i) { 984 Value *OpF1 = F1I->getOperand(i); 985 Value *OpF2 = F2I->getOperand(i); 986 987 if (!enumerate(OpF1, OpF2)) 988 return false; 989 990 if (OpF1->getValueID() != OpF2->getValueID() || 991 !isEquivalentType(OpF1->getType(), OpF2->getType())) 992 return false; 993 } 994 } 995 996 ++F1I, ++F2I; 997 } while (F1I != F1E && F2I != F2E); 998 999 return F1I == F1E && F2I == F2E; 1000 } 1001 1002 // Test whether the two functions have equivalent behaviour. 1003 bool FunctionComparator::compare() { 1004 // We need to recheck everything, but check the things that weren't included 1005 // in the hash first. 1006 1007 sn_mapL.clear(); 1008 sn_mapR.clear(); 1009 1010 if (F1->getAttributes() != F2->getAttributes()) 1011 return false; 1012 1013 if (F1->hasGC() != F2->hasGC()) 1014 return false; 1015 1016 if (F1->hasGC() && F1->getGC() != F2->getGC()) 1017 return false; 1018 1019 if (F1->hasSection() != F2->hasSection()) 1020 return false; 1021 1022 if (F1->hasSection() && F1->getSection() != F2->getSection()) 1023 return false; 1024 1025 if (F1->isVarArg() != F2->isVarArg()) 1026 return false; 1027 1028 // TODO: if it's internal and only used in direct calls, we could handle this 1029 // case too. 1030 if (F1->getCallingConv() != F2->getCallingConv()) 1031 return false; 1032 1033 if (!isEquivalentType(F1->getFunctionType(), F2->getFunctionType())) 1034 return false; 1035 1036 assert(F1->arg_size() == F2->arg_size() && 1037 "Identically typed functions have different numbers of args!"); 1038 1039 // Visit the arguments so that they get enumerated in the order they're 1040 // passed in. 1041 for (Function::const_arg_iterator f1i = F1->arg_begin(), 1042 f2i = F2->arg_begin(), f1e = F1->arg_end(); f1i != f1e; ++f1i, ++f2i) { 1043 if (!enumerate(f1i, f2i)) 1044 llvm_unreachable("Arguments repeat!"); 1045 } 1046 1047 // We do a CFG-ordered walk since the actual ordering of the blocks in the 1048 // linked list is immaterial. Our walk starts at the entry block for both 1049 // functions, then takes each block from each terminator in order. As an 1050 // artifact, this also means that unreachable blocks are ignored. 1051 SmallVector<const BasicBlock *, 8> F1BBs, F2BBs; 1052 SmallSet<const BasicBlock *, 128> VisitedBBs; // in terms of F1. 1053 1054 F1BBs.push_back(&F1->getEntryBlock()); 1055 F2BBs.push_back(&F2->getEntryBlock()); 1056 1057 VisitedBBs.insert(F1BBs[0]); 1058 while (!F1BBs.empty()) { 1059 const BasicBlock *F1BB = F1BBs.pop_back_val(); 1060 const BasicBlock *F2BB = F2BBs.pop_back_val(); 1061 1062 if (!enumerate(F1BB, F2BB) || !compare(F1BB, F2BB)) 1063 return false; 1064 1065 const TerminatorInst *F1TI = F1BB->getTerminator(); 1066 const TerminatorInst *F2TI = F2BB->getTerminator(); 1067 1068 assert(F1TI->getNumSuccessors() == F2TI->getNumSuccessors()); 1069 for (unsigned i = 0, e = F1TI->getNumSuccessors(); i != e; ++i) { 1070 if (!VisitedBBs.insert(F1TI->getSuccessor(i))) 1071 continue; 1072 1073 F1BBs.push_back(F1TI->getSuccessor(i)); 1074 F2BBs.push_back(F2TI->getSuccessor(i)); 1075 } 1076 } 1077 return true; 1078 } 1079 1080 namespace { 1081 1082 /// MergeFunctions finds functions which will generate identical machine code, 1083 /// by considering all pointer types to be equivalent. Once identified, 1084 /// MergeFunctions will fold them by replacing a call to one to a call to a 1085 /// bitcast of the other. 1086 /// 1087 class MergeFunctions : public ModulePass { 1088 public: 1089 static char ID; 1090 MergeFunctions() 1091 : ModulePass(ID), HasGlobalAliases(false) { 1092 initializeMergeFunctionsPass(*PassRegistry::getPassRegistry()); 1093 } 1094 1095 bool runOnModule(Module &M) override; 1096 1097 private: 1098 typedef DenseSet<ComparableFunction> FnSetType; 1099 1100 /// A work queue of functions that may have been modified and should be 1101 /// analyzed again. 1102 std::vector<WeakVH> Deferred; 1103 1104 /// Insert a ComparableFunction into the FnSet, or merge it away if it's 1105 /// equal to one that's already present. 1106 bool insert(ComparableFunction &NewF); 1107 1108 /// Remove a Function from the FnSet and queue it up for a second sweep of 1109 /// analysis. 1110 void remove(Function *F); 1111 1112 /// Find the functions that use this Value and remove them from FnSet and 1113 /// queue the functions. 1114 void removeUsers(Value *V); 1115 1116 /// Replace all direct calls of Old with calls of New. Will bitcast New if 1117 /// necessary to make types match. 1118 void replaceDirectCallers(Function *Old, Function *New); 1119 1120 /// Merge two equivalent functions. Upon completion, G may be deleted, or may 1121 /// be converted into a thunk. In either case, it should never be visited 1122 /// again. 1123 void mergeTwoFunctions(Function *F, Function *G); 1124 1125 /// Replace G with a thunk or an alias to F. Deletes G. 1126 void writeThunkOrAlias(Function *F, Function *G); 1127 1128 /// Replace G with a simple tail call to bitcast(F). Also replace direct uses 1129 /// of G with bitcast(F). Deletes G. 1130 void writeThunk(Function *F, Function *G); 1131 1132 /// Replace G with an alias to F. Deletes G. 1133 void writeAlias(Function *F, Function *G); 1134 1135 /// The set of all distinct functions. Use the insert() and remove() methods 1136 /// to modify it. 1137 FnSetType FnSet; 1138 1139 /// DataLayout for more accurate GEP comparisons. May be NULL. 1140 const DataLayout *DL; 1141 1142 /// Whether or not the target supports global aliases. 1143 bool HasGlobalAliases; 1144 }; 1145 1146 } // end anonymous namespace 1147 1148 char MergeFunctions::ID = 0; 1149 INITIALIZE_PASS(MergeFunctions, "mergefunc", "Merge Functions", false, false) 1150 1151 ModulePass *llvm::createMergeFunctionsPass() { 1152 return new MergeFunctions(); 1153 } 1154 1155 bool MergeFunctions::runOnModule(Module &M) { 1156 bool Changed = false; 1157 DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); 1158 DL = DLP ? &DLP->getDataLayout() : nullptr; 1159 1160 for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { 1161 if (!I->isDeclaration() && !I->hasAvailableExternallyLinkage()) 1162 Deferred.push_back(WeakVH(I)); 1163 } 1164 FnSet.resize(Deferred.size()); 1165 1166 do { 1167 std::vector<WeakVH> Worklist; 1168 Deferred.swap(Worklist); 1169 1170 DEBUG(dbgs() << "size of module: " << M.size() << '\n'); 1171 DEBUG(dbgs() << "size of worklist: " << Worklist.size() << '\n'); 1172 1173 // Insert only strong functions and merge them. Strong function merging 1174 // always deletes one of them. 1175 for (std::vector<WeakVH>::iterator I = Worklist.begin(), 1176 E = Worklist.end(); I != E; ++I) { 1177 if (!*I) continue; 1178 Function *F = cast<Function>(*I); 1179 if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage() && 1180 !F->mayBeOverridden()) { 1181 ComparableFunction CF = ComparableFunction(F, DL); 1182 Changed |= insert(CF); 1183 } 1184 } 1185 1186 // Insert only weak functions and merge them. By doing these second we 1187 // create thunks to the strong function when possible. When two weak 1188 // functions are identical, we create a new strong function with two weak 1189 // weak thunks to it which are identical but not mergable. 1190 for (std::vector<WeakVH>::iterator I = Worklist.begin(), 1191 E = Worklist.end(); I != E; ++I) { 1192 if (!*I) continue; 1193 Function *F = cast<Function>(*I); 1194 if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage() && 1195 F->mayBeOverridden()) { 1196 ComparableFunction CF = ComparableFunction(F, DL); 1197 Changed |= insert(CF); 1198 } 1199 } 1200 DEBUG(dbgs() << "size of FnSet: " << FnSet.size() << '\n'); 1201 } while (!Deferred.empty()); 1202 1203 FnSet.clear(); 1204 1205 return Changed; 1206 } 1207 1208 bool DenseMapInfo<ComparableFunction>::isEqual(const ComparableFunction &LHS, 1209 const ComparableFunction &RHS) { 1210 if (LHS.getFunc() == RHS.getFunc() && 1211 LHS.getHash() == RHS.getHash()) 1212 return true; 1213 if (!LHS.getFunc() || !RHS.getFunc()) 1214 return false; 1215 1216 // One of these is a special "underlying pointer comparison only" object. 1217 if (LHS.getDataLayout() == ComparableFunction::LookupOnly || 1218 RHS.getDataLayout() == ComparableFunction::LookupOnly) 1219 return false; 1220 1221 assert(LHS.getDataLayout() == RHS.getDataLayout() && 1222 "Comparing functions for different targets"); 1223 1224 return FunctionComparator(LHS.getDataLayout(), LHS.getFunc(), 1225 RHS.getFunc()).compare(); 1226 } 1227 1228 // Replace direct callers of Old with New. 1229 void MergeFunctions::replaceDirectCallers(Function *Old, Function *New) { 1230 Constant *BitcastNew = ConstantExpr::getBitCast(New, Old->getType()); 1231 for (auto UI = Old->use_begin(), UE = Old->use_end(); UI != UE;) { 1232 Use *U = &*UI; 1233 ++UI; 1234 CallSite CS(U->getUser()); 1235 if (CS && CS.isCallee(U)) { 1236 remove(CS.getInstruction()->getParent()->getParent()); 1237 U->set(BitcastNew); 1238 } 1239 } 1240 } 1241 1242 // Replace G with an alias to F if possible, or else a thunk to F. Deletes G. 1243 void MergeFunctions::writeThunkOrAlias(Function *F, Function *G) { 1244 if (HasGlobalAliases && G->hasUnnamedAddr()) { 1245 if (G->hasExternalLinkage() || G->hasLocalLinkage() || 1246 G->hasWeakLinkage()) { 1247 writeAlias(F, G); 1248 return; 1249 } 1250 } 1251 1252 writeThunk(F, G); 1253 } 1254 1255 // Helper for writeThunk, 1256 // Selects proper bitcast operation, 1257 // but a bit simpler then CastInst::getCastOpcode. 1258 static Value *createCast(IRBuilder<false> &Builder, Value *V, Type *DestTy) { 1259 Type *SrcTy = V->getType(); 1260 if (SrcTy->isStructTy()) { 1261 assert(DestTy->isStructTy()); 1262 assert(SrcTy->getStructNumElements() == DestTy->getStructNumElements()); 1263 Value *Result = UndefValue::get(DestTy); 1264 for (unsigned int I = 0, E = SrcTy->getStructNumElements(); I < E; ++I) { 1265 Value *Element = createCast( 1266 Builder, Builder.CreateExtractValue(V, ArrayRef<unsigned int>(I)), 1267 DestTy->getStructElementType(I)); 1268 1269 Result = 1270 Builder.CreateInsertValue(Result, Element, ArrayRef<unsigned int>(I)); 1271 } 1272 return Result; 1273 } 1274 assert(!DestTy->isStructTy()); 1275 if (SrcTy->isIntegerTy() && DestTy->isPointerTy()) 1276 return Builder.CreateIntToPtr(V, DestTy); 1277 else if (SrcTy->isPointerTy() && DestTy->isIntegerTy()) 1278 return Builder.CreatePtrToInt(V, DestTy); 1279 else 1280 return Builder.CreateBitCast(V, DestTy); 1281 } 1282 1283 // Replace G with a simple tail call to bitcast(F). Also replace direct uses 1284 // of G with bitcast(F). Deletes G. 1285 void MergeFunctions::writeThunk(Function *F, Function *G) { 1286 if (!G->mayBeOverridden()) { 1287 // Redirect direct callers of G to F. 1288 replaceDirectCallers(G, F); 1289 } 1290 1291 // If G was internal then we may have replaced all uses of G with F. If so, 1292 // stop here and delete G. There's no need for a thunk. 1293 if (G->hasLocalLinkage() && G->use_empty()) { 1294 G->eraseFromParent(); 1295 return; 1296 } 1297 1298 Function *NewG = Function::Create(G->getFunctionType(), G->getLinkage(), "", 1299 G->getParent()); 1300 BasicBlock *BB = BasicBlock::Create(F->getContext(), "", NewG); 1301 IRBuilder<false> Builder(BB); 1302 1303 SmallVector<Value *, 16> Args; 1304 unsigned i = 0; 1305 FunctionType *FFTy = F->getFunctionType(); 1306 for (Function::arg_iterator AI = NewG->arg_begin(), AE = NewG->arg_end(); 1307 AI != AE; ++AI) { 1308 Args.push_back(createCast(Builder, (Value*)AI, FFTy->getParamType(i))); 1309 ++i; 1310 } 1311 1312 CallInst *CI = Builder.CreateCall(F, Args); 1313 CI->setTailCall(); 1314 CI->setCallingConv(F->getCallingConv()); 1315 if (NewG->getReturnType()->isVoidTy()) { 1316 Builder.CreateRetVoid(); 1317 } else { 1318 Builder.CreateRet(createCast(Builder, CI, NewG->getReturnType())); 1319 } 1320 1321 NewG->copyAttributesFrom(G); 1322 NewG->takeName(G); 1323 removeUsers(G); 1324 G->replaceAllUsesWith(NewG); 1325 G->eraseFromParent(); 1326 1327 DEBUG(dbgs() << "writeThunk: " << NewG->getName() << '\n'); 1328 ++NumThunksWritten; 1329 } 1330 1331 // Replace G with an alias to F and delete G. 1332 void MergeFunctions::writeAlias(Function *F, Function *G) { 1333 PointerType *PTy = G->getType(); 1334 auto *GA = GlobalAlias::create(PTy->getElementType(), PTy->getAddressSpace(), 1335 G->getLinkage(), "", F); 1336 F->setAlignment(std::max(F->getAlignment(), G->getAlignment())); 1337 GA->takeName(G); 1338 GA->setVisibility(G->getVisibility()); 1339 removeUsers(G); 1340 G->replaceAllUsesWith(GA); 1341 G->eraseFromParent(); 1342 1343 DEBUG(dbgs() << "writeAlias: " << GA->getName() << '\n'); 1344 ++NumAliasesWritten; 1345 } 1346 1347 // Merge two equivalent functions. Upon completion, Function G is deleted. 1348 void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) { 1349 if (F->mayBeOverridden()) { 1350 assert(G->mayBeOverridden()); 1351 1352 if (HasGlobalAliases) { 1353 // Make them both thunks to the same internal function. 1354 Function *H = Function::Create(F->getFunctionType(), F->getLinkage(), "", 1355 F->getParent()); 1356 H->copyAttributesFrom(F); 1357 H->takeName(F); 1358 removeUsers(F); 1359 F->replaceAllUsesWith(H); 1360 1361 unsigned MaxAlignment = std::max(G->getAlignment(), H->getAlignment()); 1362 1363 writeAlias(F, G); 1364 writeAlias(F, H); 1365 1366 F->setAlignment(MaxAlignment); 1367 F->setLinkage(GlobalValue::PrivateLinkage); 1368 } else { 1369 // We can't merge them. Instead, pick one and update all direct callers 1370 // to call it and hope that we improve the instruction cache hit rate. 1371 replaceDirectCallers(G, F); 1372 } 1373 1374 ++NumDoubleWeak; 1375 } else { 1376 writeThunkOrAlias(F, G); 1377 } 1378 1379 ++NumFunctionsMerged; 1380 } 1381 1382 // Insert a ComparableFunction into the FnSet, or merge it away if equal to one 1383 // that was already inserted. 1384 bool MergeFunctions::insert(ComparableFunction &NewF) { 1385 std::pair<FnSetType::iterator, bool> Result = FnSet.insert(NewF); 1386 if (Result.second) { 1387 DEBUG(dbgs() << "Inserting as unique: " << NewF.getFunc()->getName() << '\n'); 1388 return false; 1389 } 1390 1391 const ComparableFunction &OldF = *Result.first; 1392 1393 // Don't merge tiny functions, since it can just end up making the function 1394 // larger. 1395 // FIXME: Should still merge them if they are unnamed_addr and produce an 1396 // alias. 1397 if (NewF.getFunc()->size() == 1) { 1398 if (NewF.getFunc()->front().size() <= 2) { 1399 DEBUG(dbgs() << NewF.getFunc()->getName() 1400 << " is to small to bother merging\n"); 1401 return false; 1402 } 1403 } 1404 1405 // Never thunk a strong function to a weak function. 1406 assert(!OldF.getFunc()->mayBeOverridden() || 1407 NewF.getFunc()->mayBeOverridden()); 1408 1409 DEBUG(dbgs() << " " << OldF.getFunc()->getName() << " == " 1410 << NewF.getFunc()->getName() << '\n'); 1411 1412 Function *DeleteF = NewF.getFunc(); 1413 NewF.release(); 1414 mergeTwoFunctions(OldF.getFunc(), DeleteF); 1415 return true; 1416 } 1417 1418 // Remove a function from FnSet. If it was already in FnSet, add it to Deferred 1419 // so that we'll look at it in the next round. 1420 void MergeFunctions::remove(Function *F) { 1421 // We need to make sure we remove F, not a function "equal" to F per the 1422 // function equality comparator. 1423 // 1424 // The special "lookup only" ComparableFunction bypasses the expensive 1425 // function comparison in favour of a pointer comparison on the underlying 1426 // Function*'s. 1427 ComparableFunction CF = ComparableFunction(F, ComparableFunction::LookupOnly); 1428 if (FnSet.erase(CF)) { 1429 DEBUG(dbgs() << "Removed " << F->getName() << " from set and deferred it.\n"); 1430 Deferred.push_back(F); 1431 } 1432 } 1433 1434 // For each instruction used by the value, remove() the function that contains 1435 // the instruction. This should happen right before a call to RAUW. 1436 void MergeFunctions::removeUsers(Value *V) { 1437 std::vector<Value *> Worklist; 1438 Worklist.push_back(V); 1439 while (!Worklist.empty()) { 1440 Value *V = Worklist.back(); 1441 Worklist.pop_back(); 1442 1443 for (User *U : V->users()) { 1444 if (Instruction *I = dyn_cast<Instruction>(U)) { 1445 remove(I->getParent()->getParent()); 1446 } else if (isa<GlobalValue>(U)) { 1447 // do nothing 1448 } else if (Constant *C = dyn_cast<Constant>(U)) { 1449 for (User *UU : C->users()) 1450 Worklist.push_back(UU); 1451 } 1452 } 1453 } 1454 } 1455