1 //===- FunctionComparator.h - Function Comparator -------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the FunctionComparator and GlobalNumberState classes 10 // which are used by the MergeFunctions pass for comparing functions. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Transforms/Utils/FunctionComparator.h" 15 #include "llvm/ADT/APFloat.h" 16 #include "llvm/ADT/APInt.h" 17 #include "llvm/ADT/ArrayRef.h" 18 #include "llvm/ADT/Hashing.h" 19 #include "llvm/ADT/SmallPtrSet.h" 20 #include "llvm/ADT/SmallVector.h" 21 #include "llvm/IR/Attributes.h" 22 #include "llvm/IR/BasicBlock.h" 23 #include "llvm/IR/CallSite.h" 24 #include "llvm/IR/Constant.h" 25 #include "llvm/IR/Constants.h" 26 #include "llvm/IR/DataLayout.h" 27 #include "llvm/IR/DerivedTypes.h" 28 #include "llvm/IR/Function.h" 29 #include "llvm/IR/GlobalValue.h" 30 #include "llvm/IR/InlineAsm.h" 31 #include "llvm/IR/InstrTypes.h" 32 #include "llvm/IR/Instruction.h" 33 #include "llvm/IR/Instructions.h" 34 #include "llvm/IR/LLVMContext.h" 35 #include "llvm/IR/Metadata.h" 36 #include "llvm/IR/Module.h" 37 #include "llvm/IR/Operator.h" 38 #include "llvm/IR/Type.h" 39 #include "llvm/IR/Value.h" 40 #include "llvm/Support/Casting.h" 41 #include "llvm/Support/Compiler.h" 42 #include "llvm/Support/Debug.h" 43 #include "llvm/Support/ErrorHandling.h" 44 #include "llvm/Support/raw_ostream.h" 45 #include <cassert> 46 #include <cstddef> 47 #include <cstdint> 48 #include <utility> 49 50 using namespace llvm; 51 52 #define DEBUG_TYPE "functioncomparator" 53 54 int FunctionComparator::cmpNumbers(uint64_t L, uint64_t R) const { 55 if (L < R) return -1; 56 if (L > R) return 1; 57 return 0; 58 } 59 60 int FunctionComparator::cmpOrderings(AtomicOrdering L, AtomicOrdering R) const { 61 if ((int)L < (int)R) return -1; 62 if ((int)L > (int)R) return 1; 63 return 0; 64 } 65 66 int FunctionComparator::cmpAPInts(const APInt &L, const APInt &R) const { 67 if (int Res = cmpNumbers(L.getBitWidth(), R.getBitWidth())) 68 return Res; 69 if (L.ugt(R)) return 1; 70 if (R.ugt(L)) return -1; 71 return 0; 72 } 73 74 int FunctionComparator::cmpAPFloats(const APFloat &L, const APFloat &R) const { 75 // Floats are ordered first by semantics (i.e. float, double, half, etc.), 76 // then by value interpreted as a bitstring (aka APInt). 77 const fltSemantics &SL = L.getSemantics(), &SR = R.getSemantics(); 78 if (int Res = cmpNumbers(APFloat::semanticsPrecision(SL), 79 APFloat::semanticsPrecision(SR))) 80 return Res; 81 if (int Res = cmpNumbers(APFloat::semanticsMaxExponent(SL), 82 APFloat::semanticsMaxExponent(SR))) 83 return Res; 84 if (int Res = cmpNumbers(APFloat::semanticsMinExponent(SL), 85 APFloat::semanticsMinExponent(SR))) 86 return Res; 87 if (int Res = cmpNumbers(APFloat::semanticsSizeInBits(SL), 88 APFloat::semanticsSizeInBits(SR))) 89 return Res; 90 return cmpAPInts(L.bitcastToAPInt(), R.bitcastToAPInt()); 91 } 92 93 int FunctionComparator::cmpMem(StringRef L, StringRef R) const { 94 // Prevent heavy comparison, compare sizes first. 95 if (int Res = cmpNumbers(L.size(), R.size())) 96 return Res; 97 98 // Compare strings lexicographically only when it is necessary: only when 99 // strings are equal in size. 100 return L.compare(R); 101 } 102 103 int FunctionComparator::cmpAttrs(const AttributeList L, 104 const AttributeList R) const { 105 if (int Res = cmpNumbers(L.getNumAttrSets(), R.getNumAttrSets())) 106 return Res; 107 108 for (unsigned i = L.index_begin(), e = L.index_end(); i != e; ++i) { 109 AttributeSet LAS = L.getAttributes(i); 110 AttributeSet RAS = R.getAttributes(i); 111 AttributeSet::iterator LI = LAS.begin(), LE = LAS.end(); 112 AttributeSet::iterator RI = RAS.begin(), RE = RAS.end(); 113 for (; LI != LE && RI != RE; ++LI, ++RI) { 114 Attribute LA = *LI; 115 Attribute RA = *RI; 116 if (LA < RA) 117 return -1; 118 if (RA < LA) 119 return 1; 120 } 121 if (LI != LE) 122 return 1; 123 if (RI != RE) 124 return -1; 125 } 126 return 0; 127 } 128 129 int FunctionComparator::cmpRangeMetadata(const MDNode *L, 130 const MDNode *R) const { 131 if (L == R) 132 return 0; 133 if (!L) 134 return -1; 135 if (!R) 136 return 1; 137 // Range metadata is a sequence of numbers. Make sure they are the same 138 // sequence. 139 // TODO: Note that as this is metadata, it is possible to drop and/or merge 140 // this data when considering functions to merge. Thus this comparison would 141 // return 0 (i.e. equivalent), but merging would become more complicated 142 // because the ranges would need to be unioned. It is not likely that 143 // functions differ ONLY in this metadata if they are actually the same 144 // function semantically. 145 if (int Res = cmpNumbers(L->getNumOperands(), R->getNumOperands())) 146 return Res; 147 for (size_t I = 0; I < L->getNumOperands(); ++I) { 148 ConstantInt *LLow = mdconst::extract<ConstantInt>(L->getOperand(I)); 149 ConstantInt *RLow = mdconst::extract<ConstantInt>(R->getOperand(I)); 150 if (int Res = cmpAPInts(LLow->getValue(), RLow->getValue())) 151 return Res; 152 } 153 return 0; 154 } 155 156 int FunctionComparator::cmpOperandBundlesSchema(const Instruction *L, 157 const Instruction *R) const { 158 ImmutableCallSite LCS(L); 159 ImmutableCallSite RCS(R); 160 161 assert(LCS && RCS && "Must be calls or invokes!"); 162 assert(LCS.isCall() == RCS.isCall() && "Can't compare otherwise!"); 163 164 if (int Res = 165 cmpNumbers(LCS.getNumOperandBundles(), RCS.getNumOperandBundles())) 166 return Res; 167 168 for (unsigned i = 0, e = LCS.getNumOperandBundles(); i != e; ++i) { 169 auto OBL = LCS.getOperandBundleAt(i); 170 auto OBR = RCS.getOperandBundleAt(i); 171 172 if (int Res = OBL.getTagName().compare(OBR.getTagName())) 173 return Res; 174 175 if (int Res = cmpNumbers(OBL.Inputs.size(), OBR.Inputs.size())) 176 return Res; 177 } 178 179 return 0; 180 } 181 182 /// Constants comparison: 183 /// 1. Check whether type of L constant could be losslessly bitcasted to R 184 /// type. 185 /// 2. Compare constant contents. 186 /// For more details see declaration comments. 187 int FunctionComparator::cmpConstants(const Constant *L, 188 const Constant *R) const { 189 Type *TyL = L->getType(); 190 Type *TyR = R->getType(); 191 192 // Check whether types are bitcastable. This part is just re-factored 193 // Type::canLosslesslyBitCastTo method, but instead of returning true/false, 194 // we also pack into result which type is "less" for us. 195 int TypesRes = cmpTypes(TyL, TyR); 196 if (TypesRes != 0) { 197 // Types are different, but check whether we can bitcast them. 198 if (!TyL->isFirstClassType()) { 199 if (TyR->isFirstClassType()) 200 return -1; 201 // Neither TyL nor TyR are values of first class type. Return the result 202 // of comparing the types 203 return TypesRes; 204 } 205 if (!TyR->isFirstClassType()) { 206 if (TyL->isFirstClassType()) 207 return 1; 208 return TypesRes; 209 } 210 211 // Vector -> Vector conversions are always lossless if the two vector types 212 // have the same size, otherwise not. 213 unsigned TyLWidth = 0; 214 unsigned TyRWidth = 0; 215 216 if (auto *VecTyL = dyn_cast<VectorType>(TyL)) 217 TyLWidth = VecTyL->getBitWidth(); 218 if (auto *VecTyR = dyn_cast<VectorType>(TyR)) 219 TyRWidth = VecTyR->getBitWidth(); 220 221 if (TyLWidth != TyRWidth) 222 return cmpNumbers(TyLWidth, TyRWidth); 223 224 // Zero bit-width means neither TyL nor TyR are vectors. 225 if (!TyLWidth) { 226 PointerType *PTyL = dyn_cast<PointerType>(TyL); 227 PointerType *PTyR = dyn_cast<PointerType>(TyR); 228 if (PTyL && PTyR) { 229 unsigned AddrSpaceL = PTyL->getAddressSpace(); 230 unsigned AddrSpaceR = PTyR->getAddressSpace(); 231 if (int Res = cmpNumbers(AddrSpaceL, AddrSpaceR)) 232 return Res; 233 } 234 if (PTyL) 235 return 1; 236 if (PTyR) 237 return -1; 238 239 // TyL and TyR aren't vectors, nor pointers. We don't know how to 240 // bitcast them. 241 return TypesRes; 242 } 243 } 244 245 // OK, types are bitcastable, now check constant contents. 246 247 if (L->isNullValue() && R->isNullValue()) 248 return TypesRes; 249 if (L->isNullValue() && !R->isNullValue()) 250 return 1; 251 if (!L->isNullValue() && R->isNullValue()) 252 return -1; 253 254 auto GlobalValueL = const_cast<GlobalValue *>(dyn_cast<GlobalValue>(L)); 255 auto GlobalValueR = const_cast<GlobalValue *>(dyn_cast<GlobalValue>(R)); 256 if (GlobalValueL && GlobalValueR) { 257 return cmpGlobalValues(GlobalValueL, GlobalValueR); 258 } 259 260 if (int Res = cmpNumbers(L->getValueID(), R->getValueID())) 261 return Res; 262 263 if (const auto *SeqL = dyn_cast<ConstantDataSequential>(L)) { 264 const auto *SeqR = cast<ConstantDataSequential>(R); 265 // This handles ConstantDataArray and ConstantDataVector. Note that we 266 // compare the two raw data arrays, which might differ depending on the host 267 // endianness. This isn't a problem though, because the endiness of a module 268 // will affect the order of the constants, but this order is the same 269 // for a given input module and host platform. 270 return cmpMem(SeqL->getRawDataValues(), SeqR->getRawDataValues()); 271 } 272 273 switch (L->getValueID()) { 274 case Value::UndefValueVal: 275 case Value::ConstantTokenNoneVal: 276 return TypesRes; 277 case Value::ConstantIntVal: { 278 const APInt &LInt = cast<ConstantInt>(L)->getValue(); 279 const APInt &RInt = cast<ConstantInt>(R)->getValue(); 280 return cmpAPInts(LInt, RInt); 281 } 282 case Value::ConstantFPVal: { 283 const APFloat &LAPF = cast<ConstantFP>(L)->getValueAPF(); 284 const APFloat &RAPF = cast<ConstantFP>(R)->getValueAPF(); 285 return cmpAPFloats(LAPF, RAPF); 286 } 287 case Value::ConstantArrayVal: { 288 const ConstantArray *LA = cast<ConstantArray>(L); 289 const ConstantArray *RA = cast<ConstantArray>(R); 290 uint64_t NumElementsL = cast<ArrayType>(TyL)->getNumElements(); 291 uint64_t NumElementsR = cast<ArrayType>(TyR)->getNumElements(); 292 if (int Res = cmpNumbers(NumElementsL, NumElementsR)) 293 return Res; 294 for (uint64_t i = 0; i < NumElementsL; ++i) { 295 if (int Res = cmpConstants(cast<Constant>(LA->getOperand(i)), 296 cast<Constant>(RA->getOperand(i)))) 297 return Res; 298 } 299 return 0; 300 } 301 case Value::ConstantStructVal: { 302 const ConstantStruct *LS = cast<ConstantStruct>(L); 303 const ConstantStruct *RS = cast<ConstantStruct>(R); 304 unsigned NumElementsL = cast<StructType>(TyL)->getNumElements(); 305 unsigned NumElementsR = cast<StructType>(TyR)->getNumElements(); 306 if (int Res = cmpNumbers(NumElementsL, NumElementsR)) 307 return Res; 308 for (unsigned i = 0; i != NumElementsL; ++i) { 309 if (int Res = cmpConstants(cast<Constant>(LS->getOperand(i)), 310 cast<Constant>(RS->getOperand(i)))) 311 return Res; 312 } 313 return 0; 314 } 315 case Value::ConstantVectorVal: { 316 const ConstantVector *LV = cast<ConstantVector>(L); 317 const ConstantVector *RV = cast<ConstantVector>(R); 318 unsigned NumElementsL = cast<VectorType>(TyL)->getNumElements(); 319 unsigned NumElementsR = cast<VectorType>(TyR)->getNumElements(); 320 if (int Res = cmpNumbers(NumElementsL, NumElementsR)) 321 return Res; 322 for (uint64_t i = 0; i < NumElementsL; ++i) { 323 if (int Res = cmpConstants(cast<Constant>(LV->getOperand(i)), 324 cast<Constant>(RV->getOperand(i)))) 325 return Res; 326 } 327 return 0; 328 } 329 case Value::ConstantExprVal: { 330 const ConstantExpr *LE = cast<ConstantExpr>(L); 331 const ConstantExpr *RE = cast<ConstantExpr>(R); 332 unsigned NumOperandsL = LE->getNumOperands(); 333 unsigned NumOperandsR = RE->getNumOperands(); 334 if (int Res = cmpNumbers(NumOperandsL, NumOperandsR)) 335 return Res; 336 for (unsigned i = 0; i < NumOperandsL; ++i) { 337 if (int Res = cmpConstants(cast<Constant>(LE->getOperand(i)), 338 cast<Constant>(RE->getOperand(i)))) 339 return Res; 340 } 341 return 0; 342 } 343 case Value::BlockAddressVal: { 344 const BlockAddress *LBA = cast<BlockAddress>(L); 345 const BlockAddress *RBA = cast<BlockAddress>(R); 346 if (int Res = cmpValues(LBA->getFunction(), RBA->getFunction())) 347 return Res; 348 if (LBA->getFunction() == RBA->getFunction()) { 349 // They are BBs in the same function. Order by which comes first in the 350 // BB order of the function. This order is deterministic. 351 Function* F = LBA->getFunction(); 352 BasicBlock *LBB = LBA->getBasicBlock(); 353 BasicBlock *RBB = RBA->getBasicBlock(); 354 if (LBB == RBB) 355 return 0; 356 for(BasicBlock &BB : F->getBasicBlockList()) { 357 if (&BB == LBB) { 358 assert(&BB != RBB); 359 return -1; 360 } 361 if (&BB == RBB) 362 return 1; 363 } 364 llvm_unreachable("Basic Block Address does not point to a basic block in " 365 "its function."); 366 return -1; 367 } else { 368 // cmpValues said the functions are the same. So because they aren't 369 // literally the same pointer, they must respectively be the left and 370 // right functions. 371 assert(LBA->getFunction() == FnL && RBA->getFunction() == FnR); 372 // cmpValues will tell us if these are equivalent BasicBlocks, in the 373 // context of their respective functions. 374 return cmpValues(LBA->getBasicBlock(), RBA->getBasicBlock()); 375 } 376 } 377 default: // Unknown constant, abort. 378 LLVM_DEBUG(dbgs() << "Looking at valueID " << L->getValueID() << "\n"); 379 llvm_unreachable("Constant ValueID not recognized."); 380 return -1; 381 } 382 } 383 384 int FunctionComparator::cmpGlobalValues(GlobalValue *L, GlobalValue *R) const { 385 uint64_t LNumber = GlobalNumbers->getNumber(L); 386 uint64_t RNumber = GlobalNumbers->getNumber(R); 387 return cmpNumbers(LNumber, RNumber); 388 } 389 390 /// cmpType - compares two types, 391 /// defines total ordering among the types set. 392 /// See method declaration comments for more details. 393 int FunctionComparator::cmpTypes(Type *TyL, Type *TyR) const { 394 PointerType *PTyL = dyn_cast<PointerType>(TyL); 395 PointerType *PTyR = dyn_cast<PointerType>(TyR); 396 397 const DataLayout &DL = FnL->getParent()->getDataLayout(); 398 if (PTyL && PTyL->getAddressSpace() == 0) 399 TyL = DL.getIntPtrType(TyL); 400 if (PTyR && PTyR->getAddressSpace() == 0) 401 TyR = DL.getIntPtrType(TyR); 402 403 if (TyL == TyR) 404 return 0; 405 406 if (int Res = cmpNumbers(TyL->getTypeID(), TyR->getTypeID())) 407 return Res; 408 409 switch (TyL->getTypeID()) { 410 default: 411 llvm_unreachable("Unknown type!"); 412 case Type::IntegerTyID: 413 return cmpNumbers(cast<IntegerType>(TyL)->getBitWidth(), 414 cast<IntegerType>(TyR)->getBitWidth()); 415 // TyL == TyR would have returned true earlier, because types are uniqued. 416 case Type::VoidTyID: 417 case Type::FloatTyID: 418 case Type::DoubleTyID: 419 case Type::X86_FP80TyID: 420 case Type::FP128TyID: 421 case Type::PPC_FP128TyID: 422 case Type::LabelTyID: 423 case Type::MetadataTyID: 424 case Type::TokenTyID: 425 return 0; 426 427 case Type::PointerTyID: 428 assert(PTyL && PTyR && "Both types must be pointers here."); 429 return cmpNumbers(PTyL->getAddressSpace(), PTyR->getAddressSpace()); 430 431 case Type::StructTyID: { 432 StructType *STyL = cast<StructType>(TyL); 433 StructType *STyR = cast<StructType>(TyR); 434 if (STyL->getNumElements() != STyR->getNumElements()) 435 return cmpNumbers(STyL->getNumElements(), STyR->getNumElements()); 436 437 if (STyL->isPacked() != STyR->isPacked()) 438 return cmpNumbers(STyL->isPacked(), STyR->isPacked()); 439 440 for (unsigned i = 0, e = STyL->getNumElements(); i != e; ++i) { 441 if (int Res = cmpTypes(STyL->getElementType(i), STyR->getElementType(i))) 442 return Res; 443 } 444 return 0; 445 } 446 447 case Type::FunctionTyID: { 448 FunctionType *FTyL = cast<FunctionType>(TyL); 449 FunctionType *FTyR = cast<FunctionType>(TyR); 450 if (FTyL->getNumParams() != FTyR->getNumParams()) 451 return cmpNumbers(FTyL->getNumParams(), FTyR->getNumParams()); 452 453 if (FTyL->isVarArg() != FTyR->isVarArg()) 454 return cmpNumbers(FTyL->isVarArg(), FTyR->isVarArg()); 455 456 if (int Res = cmpTypes(FTyL->getReturnType(), FTyR->getReturnType())) 457 return Res; 458 459 for (unsigned i = 0, e = FTyL->getNumParams(); i != e; ++i) { 460 if (int Res = cmpTypes(FTyL->getParamType(i), FTyR->getParamType(i))) 461 return Res; 462 } 463 return 0; 464 } 465 466 case Type::ArrayTyID: 467 case Type::VectorTyID: { 468 auto *STyL = cast<SequentialType>(TyL); 469 auto *STyR = cast<SequentialType>(TyR); 470 if (STyL->getNumElements() != STyR->getNumElements()) 471 return cmpNumbers(STyL->getNumElements(), STyR->getNumElements()); 472 return cmpTypes(STyL->getElementType(), STyR->getElementType()); 473 } 474 } 475 } 476 477 // Determine whether the two operations are the same except that pointer-to-A 478 // and pointer-to-B are equivalent. This should be kept in sync with 479 // Instruction::isSameOperationAs. 480 // Read method declaration comments for more details. 481 int FunctionComparator::cmpOperations(const Instruction *L, 482 const Instruction *R, 483 bool &needToCmpOperands) const { 484 needToCmpOperands = true; 485 if (int Res = cmpValues(L, R)) 486 return Res; 487 488 // Differences from Instruction::isSameOperationAs: 489 // * replace type comparison with calls to cmpTypes. 490 // * we test for I->getRawSubclassOptionalData (nuw/nsw/tail) at the top. 491 // * because of the above, we don't test for the tail bit on calls later on. 492 if (int Res = cmpNumbers(L->getOpcode(), R->getOpcode())) 493 return Res; 494 495 if (const GetElementPtrInst *GEPL = dyn_cast<GetElementPtrInst>(L)) { 496 needToCmpOperands = false; 497 const GetElementPtrInst *GEPR = cast<GetElementPtrInst>(R); 498 if (int Res = 499 cmpValues(GEPL->getPointerOperand(), GEPR->getPointerOperand())) 500 return Res; 501 return cmpGEPs(GEPL, GEPR); 502 } 503 504 if (int Res = cmpNumbers(L->getNumOperands(), R->getNumOperands())) 505 return Res; 506 507 if (int Res = cmpTypes(L->getType(), R->getType())) 508 return Res; 509 510 if (int Res = cmpNumbers(L->getRawSubclassOptionalData(), 511 R->getRawSubclassOptionalData())) 512 return Res; 513 514 // We have two instructions of identical opcode and #operands. Check to see 515 // if all operands are the same type 516 for (unsigned i = 0, e = L->getNumOperands(); i != e; ++i) { 517 if (int Res = 518 cmpTypes(L->getOperand(i)->getType(), R->getOperand(i)->getType())) 519 return Res; 520 } 521 522 // Check special state that is a part of some instructions. 523 if (const AllocaInst *AI = dyn_cast<AllocaInst>(L)) { 524 if (int Res = cmpTypes(AI->getAllocatedType(), 525 cast<AllocaInst>(R)->getAllocatedType())) 526 return Res; 527 return cmpNumbers(AI->getAlignment(), cast<AllocaInst>(R)->getAlignment()); 528 } 529 if (const LoadInst *LI = dyn_cast<LoadInst>(L)) { 530 if (int Res = cmpNumbers(LI->isVolatile(), cast<LoadInst>(R)->isVolatile())) 531 return Res; 532 if (int Res = 533 cmpNumbers(LI->getAlignment(), cast<LoadInst>(R)->getAlignment())) 534 return Res; 535 if (int Res = 536 cmpOrderings(LI->getOrdering(), cast<LoadInst>(R)->getOrdering())) 537 return Res; 538 if (int Res = cmpNumbers(LI->getSyncScopeID(), 539 cast<LoadInst>(R)->getSyncScopeID())) 540 return Res; 541 return cmpRangeMetadata(LI->getMetadata(LLVMContext::MD_range), 542 cast<LoadInst>(R)->getMetadata(LLVMContext::MD_range)); 543 } 544 if (const StoreInst *SI = dyn_cast<StoreInst>(L)) { 545 if (int Res = 546 cmpNumbers(SI->isVolatile(), cast<StoreInst>(R)->isVolatile())) 547 return Res; 548 if (int Res = 549 cmpNumbers(SI->getAlignment(), cast<StoreInst>(R)->getAlignment())) 550 return Res; 551 if (int Res = 552 cmpOrderings(SI->getOrdering(), cast<StoreInst>(R)->getOrdering())) 553 return Res; 554 return cmpNumbers(SI->getSyncScopeID(), 555 cast<StoreInst>(R)->getSyncScopeID()); 556 } 557 if (const CmpInst *CI = dyn_cast<CmpInst>(L)) 558 return cmpNumbers(CI->getPredicate(), cast<CmpInst>(R)->getPredicate()); 559 if (auto CSL = CallSite(const_cast<Instruction *>(L))) { 560 auto CSR = CallSite(const_cast<Instruction *>(R)); 561 if (int Res = cmpNumbers(CSL.getCallingConv(), CSR.getCallingConv())) 562 return Res; 563 if (int Res = cmpAttrs(CSL.getAttributes(), CSR.getAttributes())) 564 return Res; 565 if (int Res = cmpOperandBundlesSchema(L, R)) 566 return Res; 567 if (const CallInst *CI = dyn_cast<CallInst>(L)) 568 if (int Res = cmpNumbers(CI->getTailCallKind(), 569 cast<CallInst>(R)->getTailCallKind())) 570 return Res; 571 return cmpRangeMetadata(L->getMetadata(LLVMContext::MD_range), 572 R->getMetadata(LLVMContext::MD_range)); 573 } 574 if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(L)) { 575 ArrayRef<unsigned> LIndices = IVI->getIndices(); 576 ArrayRef<unsigned> RIndices = cast<InsertValueInst>(R)->getIndices(); 577 if (int Res = cmpNumbers(LIndices.size(), RIndices.size())) 578 return Res; 579 for (size_t i = 0, e = LIndices.size(); i != e; ++i) { 580 if (int Res = cmpNumbers(LIndices[i], RIndices[i])) 581 return Res; 582 } 583 return 0; 584 } 585 if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(L)) { 586 ArrayRef<unsigned> LIndices = EVI->getIndices(); 587 ArrayRef<unsigned> RIndices = cast<ExtractValueInst>(R)->getIndices(); 588 if (int Res = cmpNumbers(LIndices.size(), RIndices.size())) 589 return Res; 590 for (size_t i = 0, e = LIndices.size(); i != e; ++i) { 591 if (int Res = cmpNumbers(LIndices[i], RIndices[i])) 592 return Res; 593 } 594 } 595 if (const FenceInst *FI = dyn_cast<FenceInst>(L)) { 596 if (int Res = 597 cmpOrderings(FI->getOrdering(), cast<FenceInst>(R)->getOrdering())) 598 return Res; 599 return cmpNumbers(FI->getSyncScopeID(), 600 cast<FenceInst>(R)->getSyncScopeID()); 601 } 602 if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(L)) { 603 if (int Res = cmpNumbers(CXI->isVolatile(), 604 cast<AtomicCmpXchgInst>(R)->isVolatile())) 605 return Res; 606 if (int Res = cmpNumbers(CXI->isWeak(), 607 cast<AtomicCmpXchgInst>(R)->isWeak())) 608 return Res; 609 if (int Res = 610 cmpOrderings(CXI->getSuccessOrdering(), 611 cast<AtomicCmpXchgInst>(R)->getSuccessOrdering())) 612 return Res; 613 if (int Res = 614 cmpOrderings(CXI->getFailureOrdering(), 615 cast<AtomicCmpXchgInst>(R)->getFailureOrdering())) 616 return Res; 617 return cmpNumbers(CXI->getSyncScopeID(), 618 cast<AtomicCmpXchgInst>(R)->getSyncScopeID()); 619 } 620 if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(L)) { 621 if (int Res = cmpNumbers(RMWI->getOperation(), 622 cast<AtomicRMWInst>(R)->getOperation())) 623 return Res; 624 if (int Res = cmpNumbers(RMWI->isVolatile(), 625 cast<AtomicRMWInst>(R)->isVolatile())) 626 return Res; 627 if (int Res = cmpOrderings(RMWI->getOrdering(), 628 cast<AtomicRMWInst>(R)->getOrdering())) 629 return Res; 630 return cmpNumbers(RMWI->getSyncScopeID(), 631 cast<AtomicRMWInst>(R)->getSyncScopeID()); 632 } 633 if (const PHINode *PNL = dyn_cast<PHINode>(L)) { 634 const PHINode *PNR = cast<PHINode>(R); 635 // Ensure that in addition to the incoming values being identical 636 // (checked by the caller of this function), the incoming blocks 637 // are also identical. 638 for (unsigned i = 0, e = PNL->getNumIncomingValues(); i != e; ++i) { 639 if (int Res = 640 cmpValues(PNL->getIncomingBlock(i), PNR->getIncomingBlock(i))) 641 return Res; 642 } 643 } 644 return 0; 645 } 646 647 // Determine whether two GEP operations perform the same underlying arithmetic. 648 // Read method declaration comments for more details. 649 int FunctionComparator::cmpGEPs(const GEPOperator *GEPL, 650 const GEPOperator *GEPR) const { 651 unsigned int ASL = GEPL->getPointerAddressSpace(); 652 unsigned int ASR = GEPR->getPointerAddressSpace(); 653 654 if (int Res = cmpNumbers(ASL, ASR)) 655 return Res; 656 657 // When we have target data, we can reduce the GEP down to the value in bytes 658 // added to the address. 659 const DataLayout &DL = FnL->getParent()->getDataLayout(); 660 unsigned BitWidth = DL.getPointerSizeInBits(ASL); 661 APInt OffsetL(BitWidth, 0), OffsetR(BitWidth, 0); 662 if (GEPL->accumulateConstantOffset(DL, OffsetL) && 663 GEPR->accumulateConstantOffset(DL, OffsetR)) 664 return cmpAPInts(OffsetL, OffsetR); 665 if (int Res = cmpTypes(GEPL->getSourceElementType(), 666 GEPR->getSourceElementType())) 667 return Res; 668 669 if (int Res = cmpNumbers(GEPL->getNumOperands(), GEPR->getNumOperands())) 670 return Res; 671 672 for (unsigned i = 0, e = GEPL->getNumOperands(); i != e; ++i) { 673 if (int Res = cmpValues(GEPL->getOperand(i), GEPR->getOperand(i))) 674 return Res; 675 } 676 677 return 0; 678 } 679 680 int FunctionComparator::cmpInlineAsm(const InlineAsm *L, 681 const InlineAsm *R) const { 682 // InlineAsm's are uniqued. If they are the same pointer, obviously they are 683 // the same, otherwise compare the fields. 684 if (L == R) 685 return 0; 686 if (int Res = cmpTypes(L->getFunctionType(), R->getFunctionType())) 687 return Res; 688 if (int Res = cmpMem(L->getAsmString(), R->getAsmString())) 689 return Res; 690 if (int Res = cmpMem(L->getConstraintString(), R->getConstraintString())) 691 return Res; 692 if (int Res = cmpNumbers(L->hasSideEffects(), R->hasSideEffects())) 693 return Res; 694 if (int Res = cmpNumbers(L->isAlignStack(), R->isAlignStack())) 695 return Res; 696 if (int Res = cmpNumbers(L->getDialect(), R->getDialect())) 697 return Res; 698 assert(L->getFunctionType() != R->getFunctionType()); 699 return 0; 700 } 701 702 /// Compare two values used by the two functions under pair-wise comparison. If 703 /// this is the first time the values are seen, they're added to the mapping so 704 /// that we will detect mismatches on next use. 705 /// See comments in declaration for more details. 706 int FunctionComparator::cmpValues(const Value *L, const Value *R) const { 707 // Catch self-reference case. 708 if (L == FnL) { 709 if (R == FnR) 710 return 0; 711 return -1; 712 } 713 if (R == FnR) { 714 if (L == FnL) 715 return 0; 716 return 1; 717 } 718 719 const Constant *ConstL = dyn_cast<Constant>(L); 720 const Constant *ConstR = dyn_cast<Constant>(R); 721 if (ConstL && ConstR) { 722 if (L == R) 723 return 0; 724 return cmpConstants(ConstL, ConstR); 725 } 726 727 if (ConstL) 728 return 1; 729 if (ConstR) 730 return -1; 731 732 const InlineAsm *InlineAsmL = dyn_cast<InlineAsm>(L); 733 const InlineAsm *InlineAsmR = dyn_cast<InlineAsm>(R); 734 735 if (InlineAsmL && InlineAsmR) 736 return cmpInlineAsm(InlineAsmL, InlineAsmR); 737 if (InlineAsmL) 738 return 1; 739 if (InlineAsmR) 740 return -1; 741 742 auto LeftSN = sn_mapL.insert(std::make_pair(L, sn_mapL.size())), 743 RightSN = sn_mapR.insert(std::make_pair(R, sn_mapR.size())); 744 745 return cmpNumbers(LeftSN.first->second, RightSN.first->second); 746 } 747 748 // Test whether two basic blocks have equivalent behaviour. 749 int FunctionComparator::cmpBasicBlocks(const BasicBlock *BBL, 750 const BasicBlock *BBR) const { 751 BasicBlock::const_iterator InstL = BBL->begin(), InstLE = BBL->end(); 752 BasicBlock::const_iterator InstR = BBR->begin(), InstRE = BBR->end(); 753 754 do { 755 bool needToCmpOperands = true; 756 if (int Res = cmpOperations(&*InstL, &*InstR, needToCmpOperands)) 757 return Res; 758 if (needToCmpOperands) { 759 assert(InstL->getNumOperands() == InstR->getNumOperands()); 760 761 for (unsigned i = 0, e = InstL->getNumOperands(); i != e; ++i) { 762 Value *OpL = InstL->getOperand(i); 763 Value *OpR = InstR->getOperand(i); 764 if (int Res = cmpValues(OpL, OpR)) 765 return Res; 766 // cmpValues should ensure this is true. 767 assert(cmpTypes(OpL->getType(), OpR->getType()) == 0); 768 } 769 } 770 771 ++InstL; 772 ++InstR; 773 } while (InstL != InstLE && InstR != InstRE); 774 775 if (InstL != InstLE && InstR == InstRE) 776 return 1; 777 if (InstL == InstLE && InstR != InstRE) 778 return -1; 779 return 0; 780 } 781 782 int FunctionComparator::compareSignature() const { 783 if (int Res = cmpAttrs(FnL->getAttributes(), FnR->getAttributes())) 784 return Res; 785 786 if (int Res = cmpNumbers(FnL->hasGC(), FnR->hasGC())) 787 return Res; 788 789 if (FnL->hasGC()) { 790 if (int Res = cmpMem(FnL->getGC(), FnR->getGC())) 791 return Res; 792 } 793 794 if (int Res = cmpNumbers(FnL->hasSection(), FnR->hasSection())) 795 return Res; 796 797 if (FnL->hasSection()) { 798 if (int Res = cmpMem(FnL->getSection(), FnR->getSection())) 799 return Res; 800 } 801 802 if (int Res = cmpNumbers(FnL->isVarArg(), FnR->isVarArg())) 803 return Res; 804 805 // TODO: if it's internal and only used in direct calls, we could handle this 806 // case too. 807 if (int Res = cmpNumbers(FnL->getCallingConv(), FnR->getCallingConv())) 808 return Res; 809 810 if (int Res = cmpTypes(FnL->getFunctionType(), FnR->getFunctionType())) 811 return Res; 812 813 assert(FnL->arg_size() == FnR->arg_size() && 814 "Identically typed functions have different numbers of args!"); 815 816 // Visit the arguments so that they get enumerated in the order they're 817 // passed in. 818 for (Function::const_arg_iterator ArgLI = FnL->arg_begin(), 819 ArgRI = FnR->arg_begin(), 820 ArgLE = FnL->arg_end(); 821 ArgLI != ArgLE; ++ArgLI, ++ArgRI) { 822 if (cmpValues(&*ArgLI, &*ArgRI) != 0) 823 llvm_unreachable("Arguments repeat!"); 824 } 825 return 0; 826 } 827 828 // Test whether the two functions have equivalent behaviour. 829 int FunctionComparator::compare() { 830 beginCompare(); 831 832 if (int Res = compareSignature()) 833 return Res; 834 835 // We do a CFG-ordered walk since the actual ordering of the blocks in the 836 // linked list is immaterial. Our walk starts at the entry block for both 837 // functions, then takes each block from each terminator in order. As an 838 // artifact, this also means that unreachable blocks are ignored. 839 SmallVector<const BasicBlock *, 8> FnLBBs, FnRBBs; 840 SmallPtrSet<const BasicBlock *, 32> VisitedBBs; // in terms of F1. 841 842 FnLBBs.push_back(&FnL->getEntryBlock()); 843 FnRBBs.push_back(&FnR->getEntryBlock()); 844 845 VisitedBBs.insert(FnLBBs[0]); 846 while (!FnLBBs.empty()) { 847 const BasicBlock *BBL = FnLBBs.pop_back_val(); 848 const BasicBlock *BBR = FnRBBs.pop_back_val(); 849 850 if (int Res = cmpValues(BBL, BBR)) 851 return Res; 852 853 if (int Res = cmpBasicBlocks(BBL, BBR)) 854 return Res; 855 856 const Instruction *TermL = BBL->getTerminator(); 857 const Instruction *TermR = BBR->getTerminator(); 858 859 assert(TermL->getNumSuccessors() == TermR->getNumSuccessors()); 860 for (unsigned i = 0, e = TermL->getNumSuccessors(); i != e; ++i) { 861 if (!VisitedBBs.insert(TermL->getSuccessor(i)).second) 862 continue; 863 864 FnLBBs.push_back(TermL->getSuccessor(i)); 865 FnRBBs.push_back(TermR->getSuccessor(i)); 866 } 867 } 868 return 0; 869 } 870 871 namespace { 872 873 // Accumulate the hash of a sequence of 64-bit integers. This is similar to a 874 // hash of a sequence of 64bit ints, but the entire input does not need to be 875 // available at once. This interface is necessary for functionHash because it 876 // needs to accumulate the hash as the structure of the function is traversed 877 // without saving these values to an intermediate buffer. This form of hashing 878 // is not often needed, as usually the object to hash is just read from a 879 // buffer. 880 class HashAccumulator64 { 881 uint64_t Hash; 882 883 public: 884 // Initialize to random constant, so the state isn't zero. 885 HashAccumulator64() { Hash = 0x6acaa36bef8325c5ULL; } 886 887 void add(uint64_t V) { 888 Hash = hashing::detail::hash_16_bytes(Hash, V); 889 } 890 891 // No finishing is required, because the entire hash value is used. 892 uint64_t getHash() { return Hash; } 893 }; 894 895 } // end anonymous namespace 896 897 // A function hash is calculated by considering only the number of arguments and 898 // whether a function is varargs, the order of basic blocks (given by the 899 // successors of each basic block in depth first order), and the order of 900 // opcodes of each instruction within each of these basic blocks. This mirrors 901 // the strategy compare() uses to compare functions by walking the BBs in depth 902 // first order and comparing each instruction in sequence. Because this hash 903 // does not look at the operands, it is insensitive to things such as the 904 // target of calls and the constants used in the function, which makes it useful 905 // when possibly merging functions which are the same modulo constants and call 906 // targets. 907 FunctionComparator::FunctionHash FunctionComparator::functionHash(Function &F) { 908 HashAccumulator64 H; 909 H.add(F.isVarArg()); 910 H.add(F.arg_size()); 911 912 SmallVector<const BasicBlock *, 8> BBs; 913 SmallPtrSet<const BasicBlock *, 16> VisitedBBs; 914 915 // Walk the blocks in the same order as FunctionComparator::cmpBasicBlocks(), 916 // accumulating the hash of the function "structure." (BB and opcode sequence) 917 BBs.push_back(&F.getEntryBlock()); 918 VisitedBBs.insert(BBs[0]); 919 while (!BBs.empty()) { 920 const BasicBlock *BB = BBs.pop_back_val(); 921 // This random value acts as a block header, as otherwise the partition of 922 // opcodes into BBs wouldn't affect the hash, only the order of the opcodes 923 H.add(45798); 924 for (auto &Inst : *BB) { 925 H.add(Inst.getOpcode()); 926 } 927 const Instruction *Term = BB->getTerminator(); 928 for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) { 929 if (!VisitedBBs.insert(Term->getSuccessor(i)).second) 930 continue; 931 BBs.push_back(Term->getSuccessor(i)); 932 } 933 } 934 return H.getHash(); 935 } 936