1 //===- MergeFunctions.cpp - Merge identical functions ---------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass looks for equivalent functions that are mergable and folds them. 11 // 12 // A hash is computed from the function, based on its type and number of 13 // basic blocks. 14 // 15 // Once all hashes are computed, we perform an expensive equality comparison 16 // on each function pair. This takes n^2/2 comparisons per bucket, so it's 17 // important that the hash function be high quality. The equality comparison 18 // iterates through each instruction in each basic block. 19 // 20 // When a match is found the functions are folded. If both functions are 21 // overridable, we move the functionality into a new internal function and 22 // leave two overridable thunks to it. 23 // 24 //===----------------------------------------------------------------------===// 25 // 26 // Future work: 27 // 28 // * virtual functions. 29 // 30 // Many functions have their address taken by the virtual function table for 31 // the object they belong to. However, as long as it's only used for a lookup 32 // and call, this is irrelevant, and we'd like to fold such functions. 33 // 34 // * switch from n^2 pair-wise comparisons to an n-way comparison for each 35 // bucket. 36 // 37 // * be smarter about bitcasts. 38 // 39 // In order to fold functions, we will sometimes add either bitcast instructions 40 // or bitcast constant expressions. Unfortunately, this can confound further 41 // analysis since the two functions differ where one has a bitcast and the 42 // other doesn't. We should learn to look through bitcasts. 43 // 44 //===----------------------------------------------------------------------===// 45 46 #include "llvm/Transforms/IPO.h" 47 #include "llvm/ADT/DenseSet.h" 48 #include "llvm/ADT/FoldingSet.h" 49 #include "llvm/ADT/STLExtras.h" 50 #include "llvm/ADT/SmallSet.h" 51 #include "llvm/ADT/Statistic.h" 52 #include "llvm/IR/CallSite.h" 53 #include "llvm/IR/Constants.h" 54 #include "llvm/IR/DataLayout.h" 55 #include "llvm/IR/IRBuilder.h" 56 #include "llvm/IR/InlineAsm.h" 57 #include "llvm/IR/Instructions.h" 58 #include "llvm/IR/LLVMContext.h" 59 #include "llvm/IR/Module.h" 60 #include "llvm/IR/Operator.h" 61 #include "llvm/IR/ValueHandle.h" 62 #include "llvm/Pass.h" 63 #include "llvm/Support/Debug.h" 64 #include "llvm/Support/ErrorHandling.h" 65 #include "llvm/Support/raw_ostream.h" 66 #include <vector> 67 using namespace llvm; 68 69 #define DEBUG_TYPE "mergefunc" 70 71 STATISTIC(NumFunctionsMerged, "Number of functions merged"); 72 STATISTIC(NumThunksWritten, "Number of thunks generated"); 73 STATISTIC(NumAliasesWritten, "Number of aliases generated"); 74 STATISTIC(NumDoubleWeak, "Number of new functions created"); 75 76 /// Returns the type id for a type to be hashed. We turn pointer types into 77 /// integers here because the actual compare logic below considers pointers and 78 /// integers of the same size as equal. 79 static Type::TypeID getTypeIDForHash(Type *Ty) { 80 if (Ty->isPointerTy()) 81 return Type::IntegerTyID; 82 return Ty->getTypeID(); 83 } 84 85 /// Creates a hash-code for the function which is the same for any two 86 /// functions that will compare equal, without looking at the instructions 87 /// inside the function. 88 static unsigned profileFunction(const Function *F) { 89 FunctionType *FTy = F->getFunctionType(); 90 91 FoldingSetNodeID ID; 92 ID.AddInteger(F->size()); 93 ID.AddInteger(F->getCallingConv()); 94 ID.AddBoolean(F->hasGC()); 95 ID.AddBoolean(FTy->isVarArg()); 96 ID.AddInteger(getTypeIDForHash(FTy->getReturnType())); 97 for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) 98 ID.AddInteger(getTypeIDForHash(FTy->getParamType(i))); 99 return ID.ComputeHash(); 100 } 101 102 namespace { 103 104 /// ComparableFunction - A struct that pairs together functions with a 105 /// DataLayout so that we can keep them together as elements in the DenseSet. 106 class ComparableFunction { 107 public: 108 static const ComparableFunction EmptyKey; 109 static const ComparableFunction TombstoneKey; 110 static DataLayout * const LookupOnly; 111 112 ComparableFunction(Function *Func, const DataLayout *DL) 113 : Func(Func), Hash(profileFunction(Func)), DL(DL) {} 114 115 Function *getFunc() const { return Func; } 116 unsigned getHash() const { return Hash; } 117 const DataLayout *getDataLayout() const { return DL; } 118 119 // Drops AssertingVH reference to the function. Outside of debug mode, this 120 // does nothing. 121 void release() { 122 assert(Func && 123 "Attempted to release function twice, or release empty/tombstone!"); 124 Func = nullptr; 125 } 126 127 private: 128 explicit ComparableFunction(unsigned Hash) 129 : Func(nullptr), Hash(Hash), DL(nullptr) {} 130 131 AssertingVH<Function> Func; 132 unsigned Hash; 133 const DataLayout *DL; 134 }; 135 136 const ComparableFunction ComparableFunction::EmptyKey = ComparableFunction(0); 137 const ComparableFunction ComparableFunction::TombstoneKey = 138 ComparableFunction(1); 139 DataLayout *const ComparableFunction::LookupOnly = (DataLayout*)(-1); 140 141 } 142 143 namespace llvm { 144 template <> 145 struct DenseMapInfo<ComparableFunction> { 146 static ComparableFunction getEmptyKey() { 147 return ComparableFunction::EmptyKey; 148 } 149 static ComparableFunction getTombstoneKey() { 150 return ComparableFunction::TombstoneKey; 151 } 152 static unsigned getHashValue(const ComparableFunction &CF) { 153 return CF.getHash(); 154 } 155 static bool isEqual(const ComparableFunction &LHS, 156 const ComparableFunction &RHS); 157 }; 158 } 159 160 namespace { 161 162 /// FunctionComparator - Compares two functions to determine whether or not 163 /// they will generate machine code with the same behaviour. DataLayout is 164 /// used if available. The comparator always fails conservatively (erring on the 165 /// side of claiming that two functions are different). 166 class FunctionComparator { 167 public: 168 FunctionComparator(const DataLayout *DL, const Function *F1, 169 const Function *F2) 170 : F1(F1), F2(F2), DL(DL) {} 171 172 /// Test whether the two functions have equivalent behaviour. 173 bool compare(); 174 175 private: 176 /// Test whether two basic blocks have equivalent behaviour. 177 bool compare(const BasicBlock *BB1, const BasicBlock *BB2); 178 179 /// Constants comparison. 180 /// Its analog to lexicographical comparison between hypothetical numbers 181 /// of next format: 182 /// <bitcastability-trait><raw-bit-contents> 183 /// 184 /// 1. Bitcastability. 185 /// Check whether L's type could be losslessly bitcasted to R's type. 186 /// On this stage method, in case when lossless bitcast is not possible 187 /// method returns -1 or 1, thus also defining which type is greater in 188 /// context of bitcastability. 189 /// Stage 0: If types are equal in terms of cmpTypes, then we can go straight 190 /// to the contents comparison. 191 /// If types differ, remember types comparison result and check 192 /// whether we still can bitcast types. 193 /// Stage 1: Types that satisfies isFirstClassType conditions are always 194 /// greater then others. 195 /// Stage 2: Vector is greater then non-vector. 196 /// If both types are vectors, then vector with greater bitwidth is 197 /// greater. 198 /// If both types are vectors with the same bitwidth, then types 199 /// are bitcastable, and we can skip other stages, and go to contents 200 /// comparison. 201 /// Stage 3: Pointer types are greater than non-pointers. If both types are 202 /// pointers of the same address space - go to contents comparison. 203 /// Different address spaces: pointer with greater address space is 204 /// greater. 205 /// Stage 4: Types are neither vectors, nor pointers. And they differ. 206 /// We don't know how to bitcast them. So, we better don't do it, 207 /// and return types comparison result (so it determines the 208 /// relationship among constants we don't know how to bitcast). 209 /// 210 /// Just for clearance, let's see how the set of constants could look 211 /// on single dimension axis: 212 /// 213 /// [NFCT], [FCT, "others"], [FCT, pointers], [FCT, vectors] 214 /// Where: NFCT - Not a FirstClassType 215 /// FCT - FirstClassTyp: 216 /// 217 /// 2. Compare raw contents. 218 /// It ignores types on this stage and only compares bits from L and R. 219 /// Returns 0, if L and R has equivalent contents. 220 /// -1 or 1 if values are different. 221 /// Pretty trivial: 222 /// 2.1. If contents are numbers, compare numbers. 223 /// Ints with greater bitwidth are greater. Ints with same bitwidths 224 /// compared by their contents. 225 /// 2.2. "And so on". Just to avoid discrepancies with comments 226 /// perhaps it would be better to read the implementation itself. 227 /// 3. And again about overall picture. Let's look back at how the ordered set 228 /// of constants will look like: 229 /// [NFCT], [FCT, "others"], [FCT, pointers], [FCT, vectors] 230 /// 231 /// Now look, what could be inside [FCT, "others"], for example: 232 /// [FCT, "others"] = 233 /// [ 234 /// [double 0.1], [double 1.23], 235 /// [i32 1], [i32 2], 236 /// { double 1.0 }, ; StructTyID, NumElements = 1 237 /// { i32 1 }, ; StructTyID, NumElements = 1 238 /// { double 1, i32 1 }, ; StructTyID, NumElements = 2 239 /// { i32 1, double 1 } ; StructTyID, NumElements = 2 240 /// ] 241 /// 242 /// Let's explain the order. Float numbers will be less than integers, just 243 /// because of cmpType terms: FloatTyID < IntegerTyID. 244 /// Floats (with same fltSemantics) are sorted according to their value. 245 /// Then you can see integers, and they are, like a floats, 246 /// could be easy sorted among each others. 247 /// The structures. Structures are grouped at the tail, again because of their 248 /// TypeID: StructTyID > IntegerTyID > FloatTyID. 249 /// Structures with greater number of elements are greater. Structures with 250 /// greater elements going first are greater. 251 /// The same logic with vectors, arrays and other possible complex types. 252 /// 253 /// Bitcastable constants. 254 /// Let's assume, that some constant, belongs to some group of 255 /// "so-called-equal" values with different types, and at the same time 256 /// belongs to another group of constants with equal types 257 /// and "really" equal values. 258 /// 259 /// Now, prove that this is impossible: 260 /// 261 /// If constant A with type TyA is bitcastable to B with type TyB, then: 262 /// 1. All constants with equal types to TyA, are bitcastable to B. Since 263 /// those should be vectors (if TyA is vector), pointers 264 /// (if TyA is pointer), or else (if TyA equal to TyB), those types should 265 /// be equal to TyB. 266 /// 2. All constants with non-equal, but bitcastable types to TyA, are 267 /// bitcastable to B. 268 /// Once again, just because we allow it to vectors and pointers only. 269 /// This statement could be expanded as below: 270 /// 2.1. All vectors with equal bitwidth to vector A, has equal bitwidth to 271 /// vector B, and thus bitcastable to B as well. 272 /// 2.2. All pointers of the same address space, no matter what they point to, 273 /// bitcastable. So if C is pointer, it could be bitcasted to A and to B. 274 /// So any constant equal or bitcastable to A is equal or bitcastable to B. 275 /// QED. 276 /// 277 /// In another words, for pointers and vectors, we ignore top-level type and 278 /// look at their particular properties (bit-width for vectors, and 279 /// address space for pointers). 280 /// If these properties are equal - compare their contents. 281 int cmpConstants(const Constant *L, const Constant *R); 282 283 /// Assign or look up previously assigned numbers for the two values, and 284 /// return whether the numbers are equal. Numbers are assigned in the order 285 /// visited. 286 /// Comparison order: 287 /// Stage 0: Value that is function itself is always greater then others. 288 /// If left and right values are references to their functions, then 289 /// they are equal. 290 /// Stage 1: Constants are greater than non-constants. 291 /// If both left and right are constants, then the result of 292 /// cmpConstants is used as cmpValues result. 293 /// Stage 2: InlineAsm instances are greater than others. If both left and 294 /// right are InlineAsm instances, InlineAsm* pointers casted to 295 /// integers and compared as numbers. 296 /// Stage 3: For all other cases we compare order we meet these values in 297 /// their functions. If right value was met first during scanning, 298 /// then left value is greater. 299 /// In another words, we compare serial numbers, for more details 300 /// see comments for sn_mapL and sn_mapR. 301 int cmpValues(const Value *L, const Value *R); 302 303 bool enumerate(const Value *V1, const Value *V2) { 304 return cmpValues(V1, V2) == 0; 305 } 306 307 /// Compare two Instructions for equivalence, similar to 308 /// Instruction::isSameOperationAs but with modifications to the type 309 /// comparison. 310 /// Stages are listed in "most significant stage first" order: 311 /// On each stage below, we do comparison between some left and right 312 /// operation parts. If parts are non-equal, we assign parts comparison 313 /// result to the operation comparison result and exit from method. 314 /// Otherwise we proceed to the next stage. 315 /// Stages: 316 /// 1. Operations opcodes. Compared as numbers. 317 /// 2. Number of operands. 318 /// 3. Operation types. Compared with cmpType method. 319 /// 4. Compare operation subclass optional data as stream of bytes: 320 /// just convert it to integers and call cmpNumbers. 321 /// 5. Compare in operation operand types with cmpType in 322 /// most significant operand first order. 323 /// 6. Last stage. Check operations for some specific attributes. 324 /// For example, for Load it would be: 325 /// 6.1.Load: volatile (as boolean flag) 326 /// 6.2.Load: alignment (as integer numbers) 327 /// 6.3.Load: synch-scope (as integer numbers) 328 /// On this stage its better to see the code, since its not more than 10-15 329 /// strings for particular instruction, and could change sometimes. 330 int cmpOperation(const Instruction *L, const Instruction *R) const; 331 332 bool isEquivalentOperation(const Instruction *I1, 333 const Instruction *I2) const { 334 return cmpOperation(I1, I2) == 0; 335 } 336 337 /// Compare two GEPs for equivalent pointer arithmetic. 338 /// Parts to be compared for each comparison stage, 339 /// most significant stage first: 340 /// 1. Address space. As numbers. 341 /// 2. Constant offset, (if "DataLayout *DL" field is not NULL, 342 /// using GEPOperator::accumulateConstantOffset method). 343 /// 3. Pointer operand type (using cmpType method). 344 /// 4. Number of operands. 345 /// 5. Compare operands, using cmpValues method. 346 int cmpGEP(const GEPOperator *GEPL, const GEPOperator *GEPR); 347 int cmpGEP(const GetElementPtrInst *GEPL, const GetElementPtrInst *GEPR) { 348 return cmpGEP(cast<GEPOperator>(GEPL), cast<GEPOperator>(GEPR)); 349 } 350 351 bool isEquivalentGEP(const GEPOperator *GEP1, const GEPOperator *GEP2) { 352 return cmpGEP(GEP1, GEP2) == 0; 353 } 354 bool isEquivalentGEP(const GetElementPtrInst *GEP1, 355 const GetElementPtrInst *GEP2) { 356 return isEquivalentGEP(cast<GEPOperator>(GEP1), cast<GEPOperator>(GEP2)); 357 } 358 359 /// cmpType - compares two types, 360 /// defines total ordering among the types set. 361 /// 362 /// Return values: 363 /// 0 if types are equal, 364 /// -1 if Left is less than Right, 365 /// +1 if Left is greater than Right. 366 /// 367 /// Description: 368 /// Comparison is broken onto stages. Like in lexicographical comparison 369 /// stage coming first has higher priority. 370 /// On each explanation stage keep in mind total ordering properties. 371 /// 372 /// 0. Before comparison we coerce pointer types of 0 address space to 373 /// integer. 374 /// We also don't bother with same type at left and right, so 375 /// just return 0 in this case. 376 /// 377 /// 1. If types are of different kind (different type IDs). 378 /// Return result of type IDs comparison, treating them as numbers. 379 /// 2. If types are vectors or integers, compare Type* values as numbers. 380 /// 3. Types has same ID, so check whether they belongs to the next group: 381 /// * Void 382 /// * Float 383 /// * Double 384 /// * X86_FP80 385 /// * FP128 386 /// * PPC_FP128 387 /// * Label 388 /// * Metadata 389 /// If so - return 0, yes - we can treat these types as equal only because 390 /// their IDs are same. 391 /// 4. If Left and Right are pointers, return result of address space 392 /// comparison (numbers comparison). We can treat pointer types of same 393 /// address space as equal. 394 /// 5. If types are complex. 395 /// Then both Left and Right are to be expanded and their element types will 396 /// be checked with the same way. If we get Res != 0 on some stage, return it. 397 /// Otherwise return 0. 398 /// 6. For all other cases put llvm_unreachable. 399 int cmpType(Type *TyL, Type *TyR) const; 400 401 bool isEquivalentType(Type *Ty1, Type *Ty2) const { 402 return cmpType(Ty1, Ty2) == 0; 403 } 404 405 int cmpNumbers(uint64_t L, uint64_t R) const; 406 407 int cmpAPInt(const APInt &L, const APInt &R) const; 408 int cmpAPFloat(const APFloat &L, const APFloat &R) const; 409 int cmpStrings(StringRef L, StringRef R) const; 410 int cmpAttrs(const AttributeSet L, const AttributeSet R) const; 411 412 // The two functions undergoing comparison. 413 const Function *F1, *F2; 414 415 const DataLayout *DL; 416 417 /// Assign serial numbers to values from left function, and values from 418 /// right function. 419 /// Explanation: 420 /// Being comparing functions we need to compare values we meet at left and 421 /// right sides. 422 /// Its easy to sort things out for external values. It just should be 423 /// the same value at left and right. 424 /// But for local values (those were introduced inside function body) 425 /// we have to ensure they were introduced at exactly the same place, 426 /// and plays the same role. 427 /// Let's assign serial number to each value when we meet it first time. 428 /// Values that were met at same place will be with same serial numbers. 429 /// In this case it would be good to explain few points about values assigned 430 /// to BBs and other ways of implementation (see below). 431 /// 432 /// 1. Safety of BB reordering. 433 /// It's safe to change the order of BasicBlocks in function. 434 /// Relationship with other functions and serial numbering will not be 435 /// changed in this case. 436 /// As follows from FunctionComparator::compare(), we do CFG walk: we start 437 /// from the entry, and then take each terminator. So it doesn't matter how in 438 /// fact BBs are ordered in function. And since cmpValues are called during 439 /// this walk, the numbering depends only on how BBs located inside the CFG. 440 /// So the answer is - yes. We will get the same numbering. 441 /// 442 /// 2. Impossibility to use dominance properties of values. 443 /// If we compare two instruction operands: first is usage of local 444 /// variable AL from function FL, and second is usage of local variable AR 445 /// from FR, we could compare their origins and check whether they are 446 /// defined at the same place. 447 /// But, we are still not able to compare operands of PHI nodes, since those 448 /// could be operands from further BBs we didn't scan yet. 449 /// So it's impossible to use dominance properties in general. 450 DenseMap<const Value*, int> sn_mapL, sn_mapR; 451 }; 452 453 } 454 455 int FunctionComparator::cmpNumbers(uint64_t L, uint64_t R) const { 456 if (L < R) return -1; 457 if (L > R) return 1; 458 return 0; 459 } 460 461 int FunctionComparator::cmpAPInt(const APInt &L, const APInt &R) const { 462 if (int Res = cmpNumbers(L.getBitWidth(), R.getBitWidth())) 463 return Res; 464 if (L.ugt(R)) return 1; 465 if (R.ugt(L)) return -1; 466 return 0; 467 } 468 469 int FunctionComparator::cmpAPFloat(const APFloat &L, const APFloat &R) const { 470 if (int Res = cmpNumbers((uint64_t)&L.getSemantics(), 471 (uint64_t)&R.getSemantics())) 472 return Res; 473 return cmpAPInt(L.bitcastToAPInt(), R.bitcastToAPInt()); 474 } 475 476 int FunctionComparator::cmpStrings(StringRef L, StringRef R) const { 477 // Prevent heavy comparison, compare sizes first. 478 if (int Res = cmpNumbers(L.size(), R.size())) 479 return Res; 480 481 // Compare strings lexicographically only when it is necessary: only when 482 // strings are equal in size. 483 return L.compare(R); 484 } 485 486 int FunctionComparator::cmpAttrs(const AttributeSet L, 487 const AttributeSet R) const { 488 if (int Res = cmpNumbers(L.getNumSlots(), R.getNumSlots())) 489 return Res; 490 491 for (unsigned i = 0, e = L.getNumSlots(); i != e; ++i) { 492 AttributeSet::iterator LI = L.begin(i), LE = L.end(i), RI = R.begin(i), 493 RE = R.end(i); 494 for (; LI != LE && RI != RE; ++LI, ++RI) { 495 Attribute LA = *LI; 496 Attribute RA = *RI; 497 if (LA < RA) 498 return -1; 499 if (RA < LA) 500 return 1; 501 } 502 if (LI != LE) 503 return 1; 504 if (RI != RE) 505 return -1; 506 } 507 return 0; 508 } 509 510 /// Constants comparison: 511 /// 1. Check whether type of L constant could be losslessly bitcasted to R 512 /// type. 513 /// 2. Compare constant contents. 514 /// For more details see declaration comments. 515 int FunctionComparator::cmpConstants(const Constant *L, const Constant *R) { 516 517 Type *TyL = L->getType(); 518 Type *TyR = R->getType(); 519 520 // Check whether types are bitcastable. This part is just re-factored 521 // Type::canLosslesslyBitCastTo method, but instead of returning true/false, 522 // we also pack into result which type is "less" for us. 523 int TypesRes = cmpType(TyL, TyR); 524 if (TypesRes != 0) { 525 // Types are different, but check whether we can bitcast them. 526 if (!TyL->isFirstClassType()) { 527 if (TyR->isFirstClassType()) 528 return -1; 529 // Neither TyL nor TyR are values of first class type. Return the result 530 // of comparing the types 531 return TypesRes; 532 } 533 if (!TyR->isFirstClassType()) { 534 if (TyL->isFirstClassType()) 535 return 1; 536 return TypesRes; 537 } 538 539 // Vector -> Vector conversions are always lossless if the two vector types 540 // have the same size, otherwise not. 541 unsigned TyLWidth = 0; 542 unsigned TyRWidth = 0; 543 544 if (const VectorType *VecTyL = dyn_cast<VectorType>(TyL)) 545 TyLWidth = VecTyL->getBitWidth(); 546 if (const VectorType *VecTyR = dyn_cast<VectorType>(TyR)) 547 TyRWidth = VecTyR->getBitWidth(); 548 549 if (TyLWidth != TyRWidth) 550 return cmpNumbers(TyLWidth, TyRWidth); 551 552 // Zero bit-width means neither TyL nor TyR are vectors. 553 if (!TyLWidth) { 554 PointerType *PTyL = dyn_cast<PointerType>(TyL); 555 PointerType *PTyR = dyn_cast<PointerType>(TyR); 556 if (PTyL && PTyR) { 557 unsigned AddrSpaceL = PTyL->getAddressSpace(); 558 unsigned AddrSpaceR = PTyR->getAddressSpace(); 559 if (int Res = cmpNumbers(AddrSpaceL, AddrSpaceR)) 560 return Res; 561 } 562 if (PTyL) 563 return 1; 564 if (PTyR) 565 return -1; 566 567 // TyL and TyR aren't vectors, nor pointers. We don't know how to 568 // bitcast them. 569 return TypesRes; 570 } 571 } 572 573 // OK, types are bitcastable, now check constant contents. 574 575 if (L->isNullValue() && R->isNullValue()) 576 return TypesRes; 577 if (L->isNullValue() && !R->isNullValue()) 578 return 1; 579 if (!L->isNullValue() && R->isNullValue()) 580 return -1; 581 582 if (int Res = cmpNumbers(L->getValueID(), R->getValueID())) 583 return Res; 584 585 switch (L->getValueID()) { 586 case Value::UndefValueVal: return TypesRes; 587 case Value::ConstantIntVal: { 588 const APInt &LInt = cast<ConstantInt>(L)->getValue(); 589 const APInt &RInt = cast<ConstantInt>(R)->getValue(); 590 return cmpAPInt(LInt, RInt); 591 } 592 case Value::ConstantFPVal: { 593 const APFloat &LAPF = cast<ConstantFP>(L)->getValueAPF(); 594 const APFloat &RAPF = cast<ConstantFP>(R)->getValueAPF(); 595 return cmpAPFloat(LAPF, RAPF); 596 } 597 case Value::ConstantArrayVal: { 598 const ConstantArray *LA = cast<ConstantArray>(L); 599 const ConstantArray *RA = cast<ConstantArray>(R); 600 uint64_t NumElementsL = cast<ArrayType>(TyL)->getNumElements(); 601 uint64_t NumElementsR = cast<ArrayType>(TyR)->getNumElements(); 602 if (int Res = cmpNumbers(NumElementsL, NumElementsR)) 603 return Res; 604 for (uint64_t i = 0; i < NumElementsL; ++i) { 605 if (int Res = cmpConstants(cast<Constant>(LA->getOperand(i)), 606 cast<Constant>(RA->getOperand(i)))) 607 return Res; 608 } 609 return 0; 610 } 611 case Value::ConstantStructVal: { 612 const ConstantStruct *LS = cast<ConstantStruct>(L); 613 const ConstantStruct *RS = cast<ConstantStruct>(R); 614 unsigned NumElementsL = cast<StructType>(TyL)->getNumElements(); 615 unsigned NumElementsR = cast<StructType>(TyR)->getNumElements(); 616 if (int Res = cmpNumbers(NumElementsL, NumElementsR)) 617 return Res; 618 for (unsigned i = 0; i != NumElementsL; ++i) { 619 if (int Res = cmpConstants(cast<Constant>(LS->getOperand(i)), 620 cast<Constant>(RS->getOperand(i)))) 621 return Res; 622 } 623 return 0; 624 } 625 case Value::ConstantVectorVal: { 626 const ConstantVector *LV = cast<ConstantVector>(L); 627 const ConstantVector *RV = cast<ConstantVector>(R); 628 unsigned NumElementsL = cast<VectorType>(TyL)->getNumElements(); 629 unsigned NumElementsR = cast<VectorType>(TyR)->getNumElements(); 630 if (int Res = cmpNumbers(NumElementsL, NumElementsR)) 631 return Res; 632 for (uint64_t i = 0; i < NumElementsL; ++i) { 633 if (int Res = cmpConstants(cast<Constant>(LV->getOperand(i)), 634 cast<Constant>(RV->getOperand(i)))) 635 return Res; 636 } 637 return 0; 638 } 639 case Value::ConstantExprVal: { 640 const ConstantExpr *LE = cast<ConstantExpr>(L); 641 const ConstantExpr *RE = cast<ConstantExpr>(R); 642 unsigned NumOperandsL = LE->getNumOperands(); 643 unsigned NumOperandsR = RE->getNumOperands(); 644 if (int Res = cmpNumbers(NumOperandsL, NumOperandsR)) 645 return Res; 646 for (unsigned i = 0; i < NumOperandsL; ++i) { 647 if (int Res = cmpConstants(cast<Constant>(LE->getOperand(i)), 648 cast<Constant>(RE->getOperand(i)))) 649 return Res; 650 } 651 return 0; 652 } 653 case Value::FunctionVal: 654 case Value::GlobalVariableVal: 655 case Value::GlobalAliasVal: 656 default: // Unknown constant, cast L and R pointers to numbers and compare. 657 return cmpNumbers((uint64_t)L, (uint64_t)R); 658 } 659 } 660 661 /// cmpType - compares two types, 662 /// defines total ordering among the types set. 663 /// See method declaration comments for more details. 664 int FunctionComparator::cmpType(Type *TyL, Type *TyR) const { 665 666 PointerType *PTyL = dyn_cast<PointerType>(TyL); 667 PointerType *PTyR = dyn_cast<PointerType>(TyR); 668 669 if (DL) { 670 if (PTyL && PTyL->getAddressSpace() == 0) TyL = DL->getIntPtrType(TyL); 671 if (PTyR && PTyR->getAddressSpace() == 0) TyR = DL->getIntPtrType(TyR); 672 } 673 674 if (TyL == TyR) 675 return 0; 676 677 if (int Res = cmpNumbers(TyL->getTypeID(), TyR->getTypeID())) 678 return Res; 679 680 switch (TyL->getTypeID()) { 681 default: 682 llvm_unreachable("Unknown type!"); 683 // Fall through in Release mode. 684 case Type::IntegerTyID: 685 case Type::VectorTyID: 686 // TyL == TyR would have returned true earlier. 687 return cmpNumbers((uint64_t)TyL, (uint64_t)TyR); 688 689 case Type::VoidTyID: 690 case Type::FloatTyID: 691 case Type::DoubleTyID: 692 case Type::X86_FP80TyID: 693 case Type::FP128TyID: 694 case Type::PPC_FP128TyID: 695 case Type::LabelTyID: 696 case Type::MetadataTyID: 697 return 0; 698 699 case Type::PointerTyID: { 700 assert(PTyL && PTyR && "Both types must be pointers here."); 701 return cmpNumbers(PTyL->getAddressSpace(), PTyR->getAddressSpace()); 702 } 703 704 case Type::StructTyID: { 705 StructType *STyL = cast<StructType>(TyL); 706 StructType *STyR = cast<StructType>(TyR); 707 if (STyL->getNumElements() != STyR->getNumElements()) 708 return cmpNumbers(STyL->getNumElements(), STyR->getNumElements()); 709 710 if (STyL->isPacked() != STyR->isPacked()) 711 return cmpNumbers(STyL->isPacked(), STyR->isPacked()); 712 713 for (unsigned i = 0, e = STyL->getNumElements(); i != e; ++i) { 714 if (int Res = cmpType(STyL->getElementType(i), 715 STyR->getElementType(i))) 716 return Res; 717 } 718 return 0; 719 } 720 721 case Type::FunctionTyID: { 722 FunctionType *FTyL = cast<FunctionType>(TyL); 723 FunctionType *FTyR = cast<FunctionType>(TyR); 724 if (FTyL->getNumParams() != FTyR->getNumParams()) 725 return cmpNumbers(FTyL->getNumParams(), FTyR->getNumParams()); 726 727 if (FTyL->isVarArg() != FTyR->isVarArg()) 728 return cmpNumbers(FTyL->isVarArg(), FTyR->isVarArg()); 729 730 if (int Res = cmpType(FTyL->getReturnType(), FTyR->getReturnType())) 731 return Res; 732 733 for (unsigned i = 0, e = FTyL->getNumParams(); i != e; ++i) { 734 if (int Res = cmpType(FTyL->getParamType(i), FTyR->getParamType(i))) 735 return Res; 736 } 737 return 0; 738 } 739 740 case Type::ArrayTyID: { 741 ArrayType *ATyL = cast<ArrayType>(TyL); 742 ArrayType *ATyR = cast<ArrayType>(TyR); 743 if (ATyL->getNumElements() != ATyR->getNumElements()) 744 return cmpNumbers(ATyL->getNumElements(), ATyR->getNumElements()); 745 return cmpType(ATyL->getElementType(), ATyR->getElementType()); 746 } 747 } 748 } 749 750 // Determine whether the two operations are the same except that pointer-to-A 751 // and pointer-to-B are equivalent. This should be kept in sync with 752 // Instruction::isSameOperationAs. 753 // Read method declaration comments for more details. 754 int FunctionComparator::cmpOperation(const Instruction *L, 755 const Instruction *R) const { 756 // Differences from Instruction::isSameOperationAs: 757 // * replace type comparison with calls to isEquivalentType. 758 // * we test for I->hasSameSubclassOptionalData (nuw/nsw/tail) at the top 759 // * because of the above, we don't test for the tail bit on calls later on 760 if (int Res = cmpNumbers(L->getOpcode(), R->getOpcode())) 761 return Res; 762 763 if (int Res = cmpNumbers(L->getNumOperands(), R->getNumOperands())) 764 return Res; 765 766 if (int Res = cmpType(L->getType(), R->getType())) 767 return Res; 768 769 if (int Res = cmpNumbers(L->getRawSubclassOptionalData(), 770 R->getRawSubclassOptionalData())) 771 return Res; 772 773 // We have two instructions of identical opcode and #operands. Check to see 774 // if all operands are the same type 775 for (unsigned i = 0, e = L->getNumOperands(); i != e; ++i) { 776 if (int Res = 777 cmpType(L->getOperand(i)->getType(), R->getOperand(i)->getType())) 778 return Res; 779 } 780 781 // Check special state that is a part of some instructions. 782 if (const LoadInst *LI = dyn_cast<LoadInst>(L)) { 783 if (int Res = cmpNumbers(LI->isVolatile(), cast<LoadInst>(R)->isVolatile())) 784 return Res; 785 if (int Res = 786 cmpNumbers(LI->getAlignment(), cast<LoadInst>(R)->getAlignment())) 787 return Res; 788 if (int Res = 789 cmpNumbers(LI->getOrdering(), cast<LoadInst>(R)->getOrdering())) 790 return Res; 791 return cmpNumbers(LI->getSynchScope(), cast<LoadInst>(R)->getSynchScope()); 792 } 793 if (const StoreInst *SI = dyn_cast<StoreInst>(L)) { 794 if (int Res = 795 cmpNumbers(SI->isVolatile(), cast<StoreInst>(R)->isVolatile())) 796 return Res; 797 if (int Res = 798 cmpNumbers(SI->getAlignment(), cast<StoreInst>(R)->getAlignment())) 799 return Res; 800 if (int Res = 801 cmpNumbers(SI->getOrdering(), cast<StoreInst>(R)->getOrdering())) 802 return Res; 803 return cmpNumbers(SI->getSynchScope(), cast<StoreInst>(R)->getSynchScope()); 804 } 805 if (const CmpInst *CI = dyn_cast<CmpInst>(L)) 806 return cmpNumbers(CI->getPredicate(), cast<CmpInst>(R)->getPredicate()); 807 if (const CallInst *CI = dyn_cast<CallInst>(L)) { 808 if (int Res = cmpNumbers(CI->getCallingConv(), 809 cast<CallInst>(R)->getCallingConv())) 810 return Res; 811 return cmpAttrs(CI->getAttributes(), cast<CallInst>(R)->getAttributes()); 812 } 813 if (const InvokeInst *CI = dyn_cast<InvokeInst>(L)) { 814 if (int Res = cmpNumbers(CI->getCallingConv(), 815 cast<InvokeInst>(R)->getCallingConv())) 816 return Res; 817 return cmpAttrs(CI->getAttributes(), cast<InvokeInst>(R)->getAttributes()); 818 } 819 if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(L)) { 820 ArrayRef<unsigned> LIndices = IVI->getIndices(); 821 ArrayRef<unsigned> RIndices = cast<InsertValueInst>(R)->getIndices(); 822 if (int Res = cmpNumbers(LIndices.size(), RIndices.size())) 823 return Res; 824 for (size_t i = 0, e = LIndices.size(); i != e; ++i) { 825 if (int Res = cmpNumbers(LIndices[i], RIndices[i])) 826 return Res; 827 } 828 } 829 if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(L)) { 830 ArrayRef<unsigned> LIndices = EVI->getIndices(); 831 ArrayRef<unsigned> RIndices = cast<ExtractValueInst>(R)->getIndices(); 832 if (int Res = cmpNumbers(LIndices.size(), RIndices.size())) 833 return Res; 834 for (size_t i = 0, e = LIndices.size(); i != e; ++i) { 835 if (int Res = cmpNumbers(LIndices[i], RIndices[i])) 836 return Res; 837 } 838 } 839 if (const FenceInst *FI = dyn_cast<FenceInst>(L)) { 840 if (int Res = 841 cmpNumbers(FI->getOrdering(), cast<FenceInst>(R)->getOrdering())) 842 return Res; 843 return cmpNumbers(FI->getSynchScope(), cast<FenceInst>(R)->getSynchScope()); 844 } 845 846 if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(L)) { 847 if (int Res = cmpNumbers(CXI->isVolatile(), 848 cast<AtomicCmpXchgInst>(R)->isVolatile())) 849 return Res; 850 if (int Res = cmpNumbers(CXI->getSuccessOrdering(), 851 cast<AtomicCmpXchgInst>(R)->getSuccessOrdering())) 852 return Res; 853 if (int Res = cmpNumbers(CXI->getFailureOrdering(), 854 cast<AtomicCmpXchgInst>(R)->getFailureOrdering())) 855 return Res; 856 return cmpNumbers(CXI->getSynchScope(), 857 cast<AtomicCmpXchgInst>(R)->getSynchScope()); 858 } 859 if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(L)) { 860 if (int Res = cmpNumbers(RMWI->getOperation(), 861 cast<AtomicRMWInst>(R)->getOperation())) 862 return Res; 863 if (int Res = cmpNumbers(RMWI->isVolatile(), 864 cast<AtomicRMWInst>(R)->isVolatile())) 865 return Res; 866 if (int Res = cmpNumbers(RMWI->getOrdering(), 867 cast<AtomicRMWInst>(R)->getOrdering())) 868 return Res; 869 return cmpNumbers(RMWI->getSynchScope(), 870 cast<AtomicRMWInst>(R)->getSynchScope()); 871 } 872 return 0; 873 } 874 875 // Determine whether two GEP operations perform the same underlying arithmetic. 876 // Read method declaration comments for more details. 877 int FunctionComparator::cmpGEP(const GEPOperator *GEPL, 878 const GEPOperator *GEPR) { 879 880 unsigned int ASL = GEPL->getPointerAddressSpace(); 881 unsigned int ASR = GEPR->getPointerAddressSpace(); 882 883 if (int Res = cmpNumbers(ASL, ASR)) 884 return Res; 885 886 // When we have target data, we can reduce the GEP down to the value in bytes 887 // added to the address. 888 if (DL) { 889 unsigned BitWidth = DL->getPointerSizeInBits(ASL); 890 APInt OffsetL(BitWidth, 0), OffsetR(BitWidth, 0); 891 if (GEPL->accumulateConstantOffset(*DL, OffsetL) && 892 GEPR->accumulateConstantOffset(*DL, OffsetR)) 893 return cmpAPInt(OffsetL, OffsetR); 894 } 895 896 if (int Res = cmpNumbers((uint64_t)GEPL->getPointerOperand()->getType(), 897 (uint64_t)GEPR->getPointerOperand()->getType())) 898 return Res; 899 900 if (int Res = cmpNumbers(GEPL->getNumOperands(), GEPR->getNumOperands())) 901 return Res; 902 903 for (unsigned i = 0, e = GEPL->getNumOperands(); i != e; ++i) { 904 if (int Res = cmpValues(GEPL->getOperand(i), GEPR->getOperand(i))) 905 return Res; 906 } 907 908 return 0; 909 } 910 911 /// Compare two values used by the two functions under pair-wise comparison. If 912 /// this is the first time the values are seen, they're added to the mapping so 913 /// that we will detect mismatches on next use. 914 /// See comments in declaration for more details. 915 int FunctionComparator::cmpValues(const Value *L, const Value *R) { 916 // Catch self-reference case. 917 if (L == F1) { 918 if (R == F2) 919 return 0; 920 return -1; 921 } 922 if (R == F2) { 923 if (L == F1) 924 return 0; 925 return 1; 926 } 927 928 const Constant *ConstL = dyn_cast<Constant>(L); 929 const Constant *ConstR = dyn_cast<Constant>(R); 930 if (ConstL && ConstR) { 931 if (L == R) 932 return 0; 933 return cmpConstants(ConstL, ConstR); 934 } 935 936 if (ConstL) 937 return 1; 938 if (ConstR) 939 return -1; 940 941 const InlineAsm *InlineAsmL = dyn_cast<InlineAsm>(L); 942 const InlineAsm *InlineAsmR = dyn_cast<InlineAsm>(R); 943 944 if (InlineAsmL && InlineAsmR) 945 return cmpNumbers((uint64_t)L, (uint64_t)R); 946 if (InlineAsmL) 947 return 1; 948 if (InlineAsmR) 949 return -1; 950 951 auto LeftSN = sn_mapL.insert(std::make_pair(L, sn_mapL.size())), 952 RightSN = sn_mapR.insert(std::make_pair(R, sn_mapR.size())); 953 954 return cmpNumbers(LeftSN.first->second, RightSN.first->second); 955 } 956 // Test whether two basic blocks have equivalent behaviour. 957 bool FunctionComparator::compare(const BasicBlock *BB1, const BasicBlock *BB2) { 958 BasicBlock::const_iterator F1I = BB1->begin(), F1E = BB1->end(); 959 BasicBlock::const_iterator F2I = BB2->begin(), F2E = BB2->end(); 960 961 do { 962 if (!enumerate(F1I, F2I)) 963 return false; 964 965 if (const GetElementPtrInst *GEP1 = dyn_cast<GetElementPtrInst>(F1I)) { 966 const GetElementPtrInst *GEP2 = dyn_cast<GetElementPtrInst>(F2I); 967 if (!GEP2) 968 return false; 969 970 if (!enumerate(GEP1->getPointerOperand(), GEP2->getPointerOperand())) 971 return false; 972 973 if (!isEquivalentGEP(GEP1, GEP2)) 974 return false; 975 } else { 976 if (!isEquivalentOperation(F1I, F2I)) 977 return false; 978 979 assert(F1I->getNumOperands() == F2I->getNumOperands()); 980 for (unsigned i = 0, e = F1I->getNumOperands(); i != e; ++i) { 981 Value *OpF1 = F1I->getOperand(i); 982 Value *OpF2 = F2I->getOperand(i); 983 984 if (!enumerate(OpF1, OpF2)) 985 return false; 986 987 if (OpF1->getValueID() != OpF2->getValueID() || 988 !isEquivalentType(OpF1->getType(), OpF2->getType())) 989 return false; 990 } 991 } 992 993 ++F1I, ++F2I; 994 } while (F1I != F1E && F2I != F2E); 995 996 return F1I == F1E && F2I == F2E; 997 } 998 999 // Test whether the two functions have equivalent behaviour. 1000 bool FunctionComparator::compare() { 1001 // We need to recheck everything, but check the things that weren't included 1002 // in the hash first. 1003 1004 sn_mapL.clear(); 1005 sn_mapR.clear(); 1006 1007 if (F1->getAttributes() != F2->getAttributes()) 1008 return false; 1009 1010 if (F1->hasGC() != F2->hasGC()) 1011 return false; 1012 1013 if (F1->hasGC() && F1->getGC() != F2->getGC()) 1014 return false; 1015 1016 if (F1->hasSection() != F2->hasSection()) 1017 return false; 1018 1019 if (F1->hasSection() && F1->getSection() != F2->getSection()) 1020 return false; 1021 1022 if (F1->isVarArg() != F2->isVarArg()) 1023 return false; 1024 1025 // TODO: if it's internal and only used in direct calls, we could handle this 1026 // case too. 1027 if (F1->getCallingConv() != F2->getCallingConv()) 1028 return false; 1029 1030 if (!isEquivalentType(F1->getFunctionType(), F2->getFunctionType())) 1031 return false; 1032 1033 assert(F1->arg_size() == F2->arg_size() && 1034 "Identically typed functions have different numbers of args!"); 1035 1036 // Visit the arguments so that they get enumerated in the order they're 1037 // passed in. 1038 for (Function::const_arg_iterator f1i = F1->arg_begin(), 1039 f2i = F2->arg_begin(), f1e = F1->arg_end(); f1i != f1e; ++f1i, ++f2i) { 1040 if (!enumerate(f1i, f2i)) 1041 llvm_unreachable("Arguments repeat!"); 1042 } 1043 1044 // We do a CFG-ordered walk since the actual ordering of the blocks in the 1045 // linked list is immaterial. Our walk starts at the entry block for both 1046 // functions, then takes each block from each terminator in order. As an 1047 // artifact, this also means that unreachable blocks are ignored. 1048 SmallVector<const BasicBlock *, 8> F1BBs, F2BBs; 1049 SmallSet<const BasicBlock *, 128> VisitedBBs; // in terms of F1. 1050 1051 F1BBs.push_back(&F1->getEntryBlock()); 1052 F2BBs.push_back(&F2->getEntryBlock()); 1053 1054 VisitedBBs.insert(F1BBs[0]); 1055 while (!F1BBs.empty()) { 1056 const BasicBlock *F1BB = F1BBs.pop_back_val(); 1057 const BasicBlock *F2BB = F2BBs.pop_back_val(); 1058 1059 if (!enumerate(F1BB, F2BB) || !compare(F1BB, F2BB)) 1060 return false; 1061 1062 const TerminatorInst *F1TI = F1BB->getTerminator(); 1063 const TerminatorInst *F2TI = F2BB->getTerminator(); 1064 1065 assert(F1TI->getNumSuccessors() == F2TI->getNumSuccessors()); 1066 for (unsigned i = 0, e = F1TI->getNumSuccessors(); i != e; ++i) { 1067 if (!VisitedBBs.insert(F1TI->getSuccessor(i))) 1068 continue; 1069 1070 F1BBs.push_back(F1TI->getSuccessor(i)); 1071 F2BBs.push_back(F2TI->getSuccessor(i)); 1072 } 1073 } 1074 return true; 1075 } 1076 1077 namespace { 1078 1079 /// MergeFunctions finds functions which will generate identical machine code, 1080 /// by considering all pointer types to be equivalent. Once identified, 1081 /// MergeFunctions will fold them by replacing a call to one to a call to a 1082 /// bitcast of the other. 1083 /// 1084 class MergeFunctions : public ModulePass { 1085 public: 1086 static char ID; 1087 MergeFunctions() 1088 : ModulePass(ID), HasGlobalAliases(false) { 1089 initializeMergeFunctionsPass(*PassRegistry::getPassRegistry()); 1090 } 1091 1092 bool runOnModule(Module &M) override; 1093 1094 private: 1095 typedef DenseSet<ComparableFunction> FnSetType; 1096 1097 /// A work queue of functions that may have been modified and should be 1098 /// analyzed again. 1099 std::vector<WeakVH> Deferred; 1100 1101 /// Insert a ComparableFunction into the FnSet, or merge it away if it's 1102 /// equal to one that's already present. 1103 bool insert(ComparableFunction &NewF); 1104 1105 /// Remove a Function from the FnSet and queue it up for a second sweep of 1106 /// analysis. 1107 void remove(Function *F); 1108 1109 /// Find the functions that use this Value and remove them from FnSet and 1110 /// queue the functions. 1111 void removeUsers(Value *V); 1112 1113 /// Replace all direct calls of Old with calls of New. Will bitcast New if 1114 /// necessary to make types match. 1115 void replaceDirectCallers(Function *Old, Function *New); 1116 1117 /// Merge two equivalent functions. Upon completion, G may be deleted, or may 1118 /// be converted into a thunk. In either case, it should never be visited 1119 /// again. 1120 void mergeTwoFunctions(Function *F, Function *G); 1121 1122 /// Replace G with a thunk or an alias to F. Deletes G. 1123 void writeThunkOrAlias(Function *F, Function *G); 1124 1125 /// Replace G with a simple tail call to bitcast(F). Also replace direct uses 1126 /// of G with bitcast(F). Deletes G. 1127 void writeThunk(Function *F, Function *G); 1128 1129 /// Replace G with an alias to F. Deletes G. 1130 void writeAlias(Function *F, Function *G); 1131 1132 /// The set of all distinct functions. Use the insert() and remove() methods 1133 /// to modify it. 1134 FnSetType FnSet; 1135 1136 /// DataLayout for more accurate GEP comparisons. May be NULL. 1137 const DataLayout *DL; 1138 1139 /// Whether or not the target supports global aliases. 1140 bool HasGlobalAliases; 1141 }; 1142 1143 } // end anonymous namespace 1144 1145 char MergeFunctions::ID = 0; 1146 INITIALIZE_PASS(MergeFunctions, "mergefunc", "Merge Functions", false, false) 1147 1148 ModulePass *llvm::createMergeFunctionsPass() { 1149 return new MergeFunctions(); 1150 } 1151 1152 bool MergeFunctions::runOnModule(Module &M) { 1153 bool Changed = false; 1154 DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); 1155 DL = DLP ? &DLP->getDataLayout() : nullptr; 1156 1157 for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { 1158 if (!I->isDeclaration() && !I->hasAvailableExternallyLinkage()) 1159 Deferred.push_back(WeakVH(I)); 1160 } 1161 FnSet.resize(Deferred.size()); 1162 1163 do { 1164 std::vector<WeakVH> Worklist; 1165 Deferred.swap(Worklist); 1166 1167 DEBUG(dbgs() << "size of module: " << M.size() << '\n'); 1168 DEBUG(dbgs() << "size of worklist: " << Worklist.size() << '\n'); 1169 1170 // Insert only strong functions and merge them. Strong function merging 1171 // always deletes one of them. 1172 for (std::vector<WeakVH>::iterator I = Worklist.begin(), 1173 E = Worklist.end(); I != E; ++I) { 1174 if (!*I) continue; 1175 Function *F = cast<Function>(*I); 1176 if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage() && 1177 !F->mayBeOverridden()) { 1178 ComparableFunction CF = ComparableFunction(F, DL); 1179 Changed |= insert(CF); 1180 } 1181 } 1182 1183 // Insert only weak functions and merge them. By doing these second we 1184 // create thunks to the strong function when possible. When two weak 1185 // functions are identical, we create a new strong function with two weak 1186 // weak thunks to it which are identical but not mergable. 1187 for (std::vector<WeakVH>::iterator I = Worklist.begin(), 1188 E = Worklist.end(); I != E; ++I) { 1189 if (!*I) continue; 1190 Function *F = cast<Function>(*I); 1191 if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage() && 1192 F->mayBeOverridden()) { 1193 ComparableFunction CF = ComparableFunction(F, DL); 1194 Changed |= insert(CF); 1195 } 1196 } 1197 DEBUG(dbgs() << "size of FnSet: " << FnSet.size() << '\n'); 1198 } while (!Deferred.empty()); 1199 1200 FnSet.clear(); 1201 1202 return Changed; 1203 } 1204 1205 bool DenseMapInfo<ComparableFunction>::isEqual(const ComparableFunction &LHS, 1206 const ComparableFunction &RHS) { 1207 if (LHS.getFunc() == RHS.getFunc() && 1208 LHS.getHash() == RHS.getHash()) 1209 return true; 1210 if (!LHS.getFunc() || !RHS.getFunc()) 1211 return false; 1212 1213 // One of these is a special "underlying pointer comparison only" object. 1214 if (LHS.getDataLayout() == ComparableFunction::LookupOnly || 1215 RHS.getDataLayout() == ComparableFunction::LookupOnly) 1216 return false; 1217 1218 assert(LHS.getDataLayout() == RHS.getDataLayout() && 1219 "Comparing functions for different targets"); 1220 1221 return FunctionComparator(LHS.getDataLayout(), LHS.getFunc(), 1222 RHS.getFunc()).compare(); 1223 } 1224 1225 // Replace direct callers of Old with New. 1226 void MergeFunctions::replaceDirectCallers(Function *Old, Function *New) { 1227 Constant *BitcastNew = ConstantExpr::getBitCast(New, Old->getType()); 1228 for (auto UI = Old->use_begin(), UE = Old->use_end(); UI != UE;) { 1229 Use *U = &*UI; 1230 ++UI; 1231 CallSite CS(U->getUser()); 1232 if (CS && CS.isCallee(U)) { 1233 remove(CS.getInstruction()->getParent()->getParent()); 1234 U->set(BitcastNew); 1235 } 1236 } 1237 } 1238 1239 // Replace G with an alias to F if possible, or else a thunk to F. Deletes G. 1240 void MergeFunctions::writeThunkOrAlias(Function *F, Function *G) { 1241 if (HasGlobalAliases && G->hasUnnamedAddr()) { 1242 if (G->hasExternalLinkage() || G->hasLocalLinkage() || 1243 G->hasWeakLinkage()) { 1244 writeAlias(F, G); 1245 return; 1246 } 1247 } 1248 1249 writeThunk(F, G); 1250 } 1251 1252 // Helper for writeThunk, 1253 // Selects proper bitcast operation, 1254 // but a bit simpler then CastInst::getCastOpcode. 1255 static Value *createCast(IRBuilder<false> &Builder, Value *V, Type *DestTy) { 1256 Type *SrcTy = V->getType(); 1257 if (SrcTy->isStructTy()) { 1258 assert(DestTy->isStructTy()); 1259 assert(SrcTy->getStructNumElements() == DestTy->getStructNumElements()); 1260 Value *Result = UndefValue::get(DestTy); 1261 for (unsigned int I = 0, E = SrcTy->getStructNumElements(); I < E; ++I) { 1262 Value *Element = createCast( 1263 Builder, Builder.CreateExtractValue(V, ArrayRef<unsigned int>(I)), 1264 DestTy->getStructElementType(I)); 1265 1266 Result = 1267 Builder.CreateInsertValue(Result, Element, ArrayRef<unsigned int>(I)); 1268 } 1269 return Result; 1270 } 1271 assert(!DestTy->isStructTy()); 1272 if (SrcTy->isIntegerTy() && DestTy->isPointerTy()) 1273 return Builder.CreateIntToPtr(V, DestTy); 1274 else if (SrcTy->isPointerTy() && DestTy->isIntegerTy()) 1275 return Builder.CreatePtrToInt(V, DestTy); 1276 else 1277 return Builder.CreateBitCast(V, DestTy); 1278 } 1279 1280 // Replace G with a simple tail call to bitcast(F). Also replace direct uses 1281 // of G with bitcast(F). Deletes G. 1282 void MergeFunctions::writeThunk(Function *F, Function *G) { 1283 if (!G->mayBeOverridden()) { 1284 // Redirect direct callers of G to F. 1285 replaceDirectCallers(G, F); 1286 } 1287 1288 // If G was internal then we may have replaced all uses of G with F. If so, 1289 // stop here and delete G. There's no need for a thunk. 1290 if (G->hasLocalLinkage() && G->use_empty()) { 1291 G->eraseFromParent(); 1292 return; 1293 } 1294 1295 Function *NewG = Function::Create(G->getFunctionType(), G->getLinkage(), "", 1296 G->getParent()); 1297 BasicBlock *BB = BasicBlock::Create(F->getContext(), "", NewG); 1298 IRBuilder<false> Builder(BB); 1299 1300 SmallVector<Value *, 16> Args; 1301 unsigned i = 0; 1302 FunctionType *FFTy = F->getFunctionType(); 1303 for (Function::arg_iterator AI = NewG->arg_begin(), AE = NewG->arg_end(); 1304 AI != AE; ++AI) { 1305 Args.push_back(createCast(Builder, (Value*)AI, FFTy->getParamType(i))); 1306 ++i; 1307 } 1308 1309 CallInst *CI = Builder.CreateCall(F, Args); 1310 CI->setTailCall(); 1311 CI->setCallingConv(F->getCallingConv()); 1312 if (NewG->getReturnType()->isVoidTy()) { 1313 Builder.CreateRetVoid(); 1314 } else { 1315 Builder.CreateRet(createCast(Builder, CI, NewG->getReturnType())); 1316 } 1317 1318 NewG->copyAttributesFrom(G); 1319 NewG->takeName(G); 1320 removeUsers(G); 1321 G->replaceAllUsesWith(NewG); 1322 G->eraseFromParent(); 1323 1324 DEBUG(dbgs() << "writeThunk: " << NewG->getName() << '\n'); 1325 ++NumThunksWritten; 1326 } 1327 1328 // Replace G with an alias to F and delete G. 1329 void MergeFunctions::writeAlias(Function *F, Function *G) { 1330 PointerType *PTy = G->getType(); 1331 auto *GA = GlobalAlias::create(PTy->getElementType(), PTy->getAddressSpace(), 1332 G->getLinkage(), "", F); 1333 F->setAlignment(std::max(F->getAlignment(), G->getAlignment())); 1334 GA->takeName(G); 1335 GA->setVisibility(G->getVisibility()); 1336 removeUsers(G); 1337 G->replaceAllUsesWith(GA); 1338 G->eraseFromParent(); 1339 1340 DEBUG(dbgs() << "writeAlias: " << GA->getName() << '\n'); 1341 ++NumAliasesWritten; 1342 } 1343 1344 // Merge two equivalent functions. Upon completion, Function G is deleted. 1345 void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) { 1346 if (F->mayBeOverridden()) { 1347 assert(G->mayBeOverridden()); 1348 1349 if (HasGlobalAliases) { 1350 // Make them both thunks to the same internal function. 1351 Function *H = Function::Create(F->getFunctionType(), F->getLinkage(), "", 1352 F->getParent()); 1353 H->copyAttributesFrom(F); 1354 H->takeName(F); 1355 removeUsers(F); 1356 F->replaceAllUsesWith(H); 1357 1358 unsigned MaxAlignment = std::max(G->getAlignment(), H->getAlignment()); 1359 1360 writeAlias(F, G); 1361 writeAlias(F, H); 1362 1363 F->setAlignment(MaxAlignment); 1364 F->setLinkage(GlobalValue::PrivateLinkage); 1365 } else { 1366 // We can't merge them. Instead, pick one and update all direct callers 1367 // to call it and hope that we improve the instruction cache hit rate. 1368 replaceDirectCallers(G, F); 1369 } 1370 1371 ++NumDoubleWeak; 1372 } else { 1373 writeThunkOrAlias(F, G); 1374 } 1375 1376 ++NumFunctionsMerged; 1377 } 1378 1379 // Insert a ComparableFunction into the FnSet, or merge it away if equal to one 1380 // that was already inserted. 1381 bool MergeFunctions::insert(ComparableFunction &NewF) { 1382 std::pair<FnSetType::iterator, bool> Result = FnSet.insert(NewF); 1383 if (Result.second) { 1384 DEBUG(dbgs() << "Inserting as unique: " << NewF.getFunc()->getName() << '\n'); 1385 return false; 1386 } 1387 1388 const ComparableFunction &OldF = *Result.first; 1389 1390 // Don't merge tiny functions, since it can just end up making the function 1391 // larger. 1392 // FIXME: Should still merge them if they are unnamed_addr and produce an 1393 // alias. 1394 if (NewF.getFunc()->size() == 1) { 1395 if (NewF.getFunc()->front().size() <= 2) { 1396 DEBUG(dbgs() << NewF.getFunc()->getName() 1397 << " is to small to bother merging\n"); 1398 return false; 1399 } 1400 } 1401 1402 // Never thunk a strong function to a weak function. 1403 assert(!OldF.getFunc()->mayBeOverridden() || 1404 NewF.getFunc()->mayBeOverridden()); 1405 1406 DEBUG(dbgs() << " " << OldF.getFunc()->getName() << " == " 1407 << NewF.getFunc()->getName() << '\n'); 1408 1409 Function *DeleteF = NewF.getFunc(); 1410 NewF.release(); 1411 mergeTwoFunctions(OldF.getFunc(), DeleteF); 1412 return true; 1413 } 1414 1415 // Remove a function from FnSet. If it was already in FnSet, add it to Deferred 1416 // so that we'll look at it in the next round. 1417 void MergeFunctions::remove(Function *F) { 1418 // We need to make sure we remove F, not a function "equal" to F per the 1419 // function equality comparator. 1420 // 1421 // The special "lookup only" ComparableFunction bypasses the expensive 1422 // function comparison in favour of a pointer comparison on the underlying 1423 // Function*'s. 1424 ComparableFunction CF = ComparableFunction(F, ComparableFunction::LookupOnly); 1425 if (FnSet.erase(CF)) { 1426 DEBUG(dbgs() << "Removed " << F->getName() << " from set and deferred it.\n"); 1427 Deferred.push_back(F); 1428 } 1429 } 1430 1431 // For each instruction used by the value, remove() the function that contains 1432 // the instruction. This should happen right before a call to RAUW. 1433 void MergeFunctions::removeUsers(Value *V) { 1434 std::vector<Value *> Worklist; 1435 Worklist.push_back(V); 1436 while (!Worklist.empty()) { 1437 Value *V = Worklist.back(); 1438 Worklist.pop_back(); 1439 1440 for (User *U : V->users()) { 1441 if (Instruction *I = dyn_cast<Instruction>(U)) { 1442 remove(I->getParent()->getParent()); 1443 } else if (isa<GlobalValue>(U)) { 1444 // do nothing 1445 } else if (Constant *C = dyn_cast<Constant>(U)) { 1446 for (User *UU : C->users()) 1447 Worklist.push_back(UU); 1448 } 1449 } 1450 } 1451 } 1452