1 //===- FunctionComparator.h - Function Comparator -------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the FunctionComparator and GlobalNumberState classes
11 // which are used by the MergeFunctions pass for comparing functions.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/Transforms/Utils/FunctionComparator.h"
16 #include "llvm/ADT/SmallSet.h"
17 #include "llvm/IR/CallSite.h"
18 #include "llvm/IR/InlineAsm.h"
19 #include "llvm/IR/Instructions.h"
20 #include "llvm/IR/Module.h"
21 #include "llvm/Support/Debug.h"
22 #include "llvm/Support/raw_ostream.h"
23 
24 using namespace llvm;
25 
26 #define DEBUG_TYPE "functioncomparator"
27 
28 int FunctionComparator::cmpNumbers(uint64_t L, uint64_t R) const {
29   if (L < R) return -1;
30   if (L > R) return 1;
31   return 0;
32 }
33 
34 int FunctionComparator::cmpOrderings(AtomicOrdering L, AtomicOrdering R) const {
35   if ((int)L < (int)R) return -1;
36   if ((int)L > (int)R) return 1;
37   return 0;
38 }
39 
40 int FunctionComparator::cmpAPInts(const APInt &L, const APInt &R) const {
41   if (int Res = cmpNumbers(L.getBitWidth(), R.getBitWidth()))
42     return Res;
43   if (L.ugt(R)) return 1;
44   if (R.ugt(L)) return -1;
45   return 0;
46 }
47 
48 int FunctionComparator::cmpAPFloats(const APFloat &L, const APFloat &R) const {
49   // Floats are ordered first by semantics (i.e. float, double, half, etc.),
50   // then by value interpreted as a bitstring (aka APInt).
51   const fltSemantics &SL = L.getSemantics(), &SR = R.getSemantics();
52   if (int Res = cmpNumbers(APFloat::semanticsPrecision(SL),
53                            APFloat::semanticsPrecision(SR)))
54     return Res;
55   if (int Res = cmpNumbers(APFloat::semanticsMaxExponent(SL),
56                            APFloat::semanticsMaxExponent(SR)))
57     return Res;
58   if (int Res = cmpNumbers(APFloat::semanticsMinExponent(SL),
59                            APFloat::semanticsMinExponent(SR)))
60     return Res;
61   if (int Res = cmpNumbers(APFloat::semanticsSizeInBits(SL),
62                            APFloat::semanticsSizeInBits(SR)))
63     return Res;
64   return cmpAPInts(L.bitcastToAPInt(), R.bitcastToAPInt());
65 }
66 
67 int FunctionComparator::cmpMem(StringRef L, StringRef R) const {
68   // Prevent heavy comparison, compare sizes first.
69   if (int Res = cmpNumbers(L.size(), R.size()))
70     return Res;
71 
72   // Compare strings lexicographically only when it is necessary: only when
73   // strings are equal in size.
74   return L.compare(R);
75 }
76 
77 int FunctionComparator::cmpAttrs(const AttributeList L,
78                                  const AttributeList R) const {
79   if (int Res = cmpNumbers(L.getNumAttrSets(), R.getNumAttrSets()))
80     return Res;
81 
82   for (unsigned i = L.index_begin(), e = L.index_end(); i != e; ++i) {
83     AttributeSet LAS = L.getAttributes(i);
84     AttributeSet RAS = R.getAttributes(i);
85     AttributeSet::iterator LI = LAS.begin(), LE = LAS.end();
86     AttributeSet::iterator RI = RAS.begin(), RE = RAS.end();
87     for (; LI != LE && RI != RE; ++LI, ++RI) {
88       Attribute LA = *LI;
89       Attribute RA = *RI;
90       if (LA < RA)
91         return -1;
92       if (RA < LA)
93         return 1;
94     }
95     if (LI != LE)
96       return 1;
97     if (RI != RE)
98       return -1;
99   }
100   return 0;
101 }
102 
103 int FunctionComparator::cmpRangeMetadata(const MDNode *L,
104                                          const MDNode *R) const {
105   if (L == R)
106     return 0;
107   if (!L)
108     return -1;
109   if (!R)
110     return 1;
111   // Range metadata is a sequence of numbers. Make sure they are the same
112   // sequence.
113   // TODO: Note that as this is metadata, it is possible to drop and/or merge
114   // this data when considering functions to merge. Thus this comparison would
115   // return 0 (i.e. equivalent), but merging would become more complicated
116   // because the ranges would need to be unioned. It is not likely that
117   // functions differ ONLY in this metadata if they are actually the same
118   // function semantically.
119   if (int Res = cmpNumbers(L->getNumOperands(), R->getNumOperands()))
120     return Res;
121   for (size_t I = 0; I < L->getNumOperands(); ++I) {
122     ConstantInt *LLow = mdconst::extract<ConstantInt>(L->getOperand(I));
123     ConstantInt *RLow = mdconst::extract<ConstantInt>(R->getOperand(I));
124     if (int Res = cmpAPInts(LLow->getValue(), RLow->getValue()))
125       return Res;
126   }
127   return 0;
128 }
129 
130 int FunctionComparator::cmpOperandBundlesSchema(const Instruction *L,
131                                                 const Instruction *R) const {
132   ImmutableCallSite LCS(L);
133   ImmutableCallSite RCS(R);
134 
135   assert(LCS && RCS && "Must be calls or invokes!");
136   assert(LCS.isCall() == RCS.isCall() && "Can't compare otherwise!");
137 
138   if (int Res =
139           cmpNumbers(LCS.getNumOperandBundles(), RCS.getNumOperandBundles()))
140     return Res;
141 
142   for (unsigned i = 0, e = LCS.getNumOperandBundles(); i != e; ++i) {
143     auto OBL = LCS.getOperandBundleAt(i);
144     auto OBR = RCS.getOperandBundleAt(i);
145 
146     if (int Res = OBL.getTagName().compare(OBR.getTagName()))
147       return Res;
148 
149     if (int Res = cmpNumbers(OBL.Inputs.size(), OBR.Inputs.size()))
150       return Res;
151   }
152 
153   return 0;
154 }
155 
156 /// Constants comparison:
157 /// 1. Check whether type of L constant could be losslessly bitcasted to R
158 /// type.
159 /// 2. Compare constant contents.
160 /// For more details see declaration comments.
161 int FunctionComparator::cmpConstants(const Constant *L,
162                                      const Constant *R) const {
163 
164   Type *TyL = L->getType();
165   Type *TyR = R->getType();
166 
167   // Check whether types are bitcastable. This part is just re-factored
168   // Type::canLosslesslyBitCastTo method, but instead of returning true/false,
169   // we also pack into result which type is "less" for us.
170   int TypesRes = cmpTypes(TyL, TyR);
171   if (TypesRes != 0) {
172     // Types are different, but check whether we can bitcast them.
173     if (!TyL->isFirstClassType()) {
174       if (TyR->isFirstClassType())
175         return -1;
176       // Neither TyL nor TyR are values of first class type. Return the result
177       // of comparing the types
178       return TypesRes;
179     }
180     if (!TyR->isFirstClassType()) {
181       if (TyL->isFirstClassType())
182         return 1;
183       return TypesRes;
184     }
185 
186     // Vector -> Vector conversions are always lossless if the two vector types
187     // have the same size, otherwise not.
188     unsigned TyLWidth = 0;
189     unsigned TyRWidth = 0;
190 
191     if (auto *VecTyL = dyn_cast<VectorType>(TyL))
192       TyLWidth = VecTyL->getBitWidth();
193     if (auto *VecTyR = dyn_cast<VectorType>(TyR))
194       TyRWidth = VecTyR->getBitWidth();
195 
196     if (TyLWidth != TyRWidth)
197       return cmpNumbers(TyLWidth, TyRWidth);
198 
199     // Zero bit-width means neither TyL nor TyR are vectors.
200     if (!TyLWidth) {
201       PointerType *PTyL = dyn_cast<PointerType>(TyL);
202       PointerType *PTyR = dyn_cast<PointerType>(TyR);
203       if (PTyL && PTyR) {
204         unsigned AddrSpaceL = PTyL->getAddressSpace();
205         unsigned AddrSpaceR = PTyR->getAddressSpace();
206         if (int Res = cmpNumbers(AddrSpaceL, AddrSpaceR))
207           return Res;
208       }
209       if (PTyL)
210         return 1;
211       if (PTyR)
212         return -1;
213 
214       // TyL and TyR aren't vectors, nor pointers. We don't know how to
215       // bitcast them.
216       return TypesRes;
217     }
218   }
219 
220   // OK, types are bitcastable, now check constant contents.
221 
222   if (L->isNullValue() && R->isNullValue())
223     return TypesRes;
224   if (L->isNullValue() && !R->isNullValue())
225     return 1;
226   if (!L->isNullValue() && R->isNullValue())
227     return -1;
228 
229   auto GlobalValueL = const_cast<GlobalValue*>(dyn_cast<GlobalValue>(L));
230   auto GlobalValueR = const_cast<GlobalValue*>(dyn_cast<GlobalValue>(R));
231   if (GlobalValueL && GlobalValueR) {
232     return cmpGlobalValues(GlobalValueL, GlobalValueR);
233   }
234 
235   if (int Res = cmpNumbers(L->getValueID(), R->getValueID()))
236     return Res;
237 
238   if (const auto *SeqL = dyn_cast<ConstantDataSequential>(L)) {
239     const auto *SeqR = cast<ConstantDataSequential>(R);
240     // This handles ConstantDataArray and ConstantDataVector. Note that we
241     // compare the two raw data arrays, which might differ depending on the host
242     // endianness. This isn't a problem though, because the endiness of a module
243     // will affect the order of the constants, but this order is the same
244     // for a given input module and host platform.
245     return cmpMem(SeqL->getRawDataValues(), SeqR->getRawDataValues());
246   }
247 
248   switch (L->getValueID()) {
249   case Value::UndefValueVal:
250   case Value::ConstantTokenNoneVal:
251     return TypesRes;
252   case Value::ConstantIntVal: {
253     const APInt &LInt = cast<ConstantInt>(L)->getValue();
254     const APInt &RInt = cast<ConstantInt>(R)->getValue();
255     return cmpAPInts(LInt, RInt);
256   }
257   case Value::ConstantFPVal: {
258     const APFloat &LAPF = cast<ConstantFP>(L)->getValueAPF();
259     const APFloat &RAPF = cast<ConstantFP>(R)->getValueAPF();
260     return cmpAPFloats(LAPF, RAPF);
261   }
262   case Value::ConstantArrayVal: {
263     const ConstantArray *LA = cast<ConstantArray>(L);
264     const ConstantArray *RA = cast<ConstantArray>(R);
265     uint64_t NumElementsL = cast<ArrayType>(TyL)->getNumElements();
266     uint64_t NumElementsR = cast<ArrayType>(TyR)->getNumElements();
267     if (int Res = cmpNumbers(NumElementsL, NumElementsR))
268       return Res;
269     for (uint64_t i = 0; i < NumElementsL; ++i) {
270       if (int Res = cmpConstants(cast<Constant>(LA->getOperand(i)),
271                                  cast<Constant>(RA->getOperand(i))))
272         return Res;
273     }
274     return 0;
275   }
276   case Value::ConstantStructVal: {
277     const ConstantStruct *LS = cast<ConstantStruct>(L);
278     const ConstantStruct *RS = cast<ConstantStruct>(R);
279     unsigned NumElementsL = cast<StructType>(TyL)->getNumElements();
280     unsigned NumElementsR = cast<StructType>(TyR)->getNumElements();
281     if (int Res = cmpNumbers(NumElementsL, NumElementsR))
282       return Res;
283     for (unsigned i = 0; i != NumElementsL; ++i) {
284       if (int Res = cmpConstants(cast<Constant>(LS->getOperand(i)),
285                                  cast<Constant>(RS->getOperand(i))))
286         return Res;
287     }
288     return 0;
289   }
290   case Value::ConstantVectorVal: {
291     const ConstantVector *LV = cast<ConstantVector>(L);
292     const ConstantVector *RV = cast<ConstantVector>(R);
293     unsigned NumElementsL = cast<VectorType>(TyL)->getNumElements();
294     unsigned NumElementsR = cast<VectorType>(TyR)->getNumElements();
295     if (int Res = cmpNumbers(NumElementsL, NumElementsR))
296       return Res;
297     for (uint64_t i = 0; i < NumElementsL; ++i) {
298       if (int Res = cmpConstants(cast<Constant>(LV->getOperand(i)),
299                                  cast<Constant>(RV->getOperand(i))))
300         return Res;
301     }
302     return 0;
303   }
304   case Value::ConstantExprVal: {
305     const ConstantExpr *LE = cast<ConstantExpr>(L);
306     const ConstantExpr *RE = cast<ConstantExpr>(R);
307     unsigned NumOperandsL = LE->getNumOperands();
308     unsigned NumOperandsR = RE->getNumOperands();
309     if (int Res = cmpNumbers(NumOperandsL, NumOperandsR))
310       return Res;
311     for (unsigned i = 0; i < NumOperandsL; ++i) {
312       if (int Res = cmpConstants(cast<Constant>(LE->getOperand(i)),
313                                  cast<Constant>(RE->getOperand(i))))
314         return Res;
315     }
316     return 0;
317   }
318   case Value::BlockAddressVal: {
319     const BlockAddress *LBA = cast<BlockAddress>(L);
320     const BlockAddress *RBA = cast<BlockAddress>(R);
321     if (int Res = cmpValues(LBA->getFunction(), RBA->getFunction()))
322       return Res;
323     if (LBA->getFunction() == RBA->getFunction()) {
324       // They are BBs in the same function. Order by which comes first in the
325       // BB order of the function. This order is deterministic.
326       Function* F = LBA->getFunction();
327       BasicBlock *LBB = LBA->getBasicBlock();
328       BasicBlock *RBB = RBA->getBasicBlock();
329       if (LBB == RBB)
330         return 0;
331       for(BasicBlock &BB : F->getBasicBlockList()) {
332         if (&BB == LBB) {
333           assert(&BB != RBB);
334           return -1;
335         }
336         if (&BB == RBB)
337           return 1;
338       }
339       llvm_unreachable("Basic Block Address does not point to a basic block in "
340                        "its function.");
341       return -1;
342     } else {
343       // cmpValues said the functions are the same. So because they aren't
344       // literally the same pointer, they must respectively be the left and
345       // right functions.
346       assert(LBA->getFunction() == FnL && RBA->getFunction() == FnR);
347       // cmpValues will tell us if these are equivalent BasicBlocks, in the
348       // context of their respective functions.
349       return cmpValues(LBA->getBasicBlock(), RBA->getBasicBlock());
350     }
351   }
352   default: // Unknown constant, abort.
353     DEBUG(dbgs() << "Looking at valueID " << L->getValueID() << "\n");
354     llvm_unreachable("Constant ValueID not recognized.");
355     return -1;
356   }
357 }
358 
359 int FunctionComparator::cmpGlobalValues(GlobalValue *L, GlobalValue *R) const {
360   uint64_t LNumber = GlobalNumbers->getNumber(L);
361   uint64_t RNumber = GlobalNumbers->getNumber(R);
362   return cmpNumbers(LNumber, RNumber);
363 }
364 
365 /// cmpType - compares two types,
366 /// defines total ordering among the types set.
367 /// See method declaration comments for more details.
368 int FunctionComparator::cmpTypes(Type *TyL, Type *TyR) const {
369   PointerType *PTyL = dyn_cast<PointerType>(TyL);
370   PointerType *PTyR = dyn_cast<PointerType>(TyR);
371 
372   const DataLayout &DL = FnL->getParent()->getDataLayout();
373   if (PTyL && PTyL->getAddressSpace() == 0)
374     TyL = DL.getIntPtrType(TyL);
375   if (PTyR && PTyR->getAddressSpace() == 0)
376     TyR = DL.getIntPtrType(TyR);
377 
378   if (TyL == TyR)
379     return 0;
380 
381   if (int Res = cmpNumbers(TyL->getTypeID(), TyR->getTypeID()))
382     return Res;
383 
384   switch (TyL->getTypeID()) {
385   default:
386     llvm_unreachable("Unknown type!");
387     // Fall through in Release mode.
388     LLVM_FALLTHROUGH;
389   case Type::IntegerTyID:
390     return cmpNumbers(cast<IntegerType>(TyL)->getBitWidth(),
391                       cast<IntegerType>(TyR)->getBitWidth());
392   // TyL == TyR would have returned true earlier, because types are uniqued.
393   case Type::VoidTyID:
394   case Type::FloatTyID:
395   case Type::DoubleTyID:
396   case Type::X86_FP80TyID:
397   case Type::FP128TyID:
398   case Type::PPC_FP128TyID:
399   case Type::LabelTyID:
400   case Type::MetadataTyID:
401   case Type::TokenTyID:
402     return 0;
403 
404   case Type::PointerTyID: {
405     assert(PTyL && PTyR && "Both types must be pointers here.");
406     return cmpNumbers(PTyL->getAddressSpace(), PTyR->getAddressSpace());
407   }
408 
409   case Type::StructTyID: {
410     StructType *STyL = cast<StructType>(TyL);
411     StructType *STyR = cast<StructType>(TyR);
412     if (STyL->getNumElements() != STyR->getNumElements())
413       return cmpNumbers(STyL->getNumElements(), STyR->getNumElements());
414 
415     if (STyL->isPacked() != STyR->isPacked())
416       return cmpNumbers(STyL->isPacked(), STyR->isPacked());
417 
418     for (unsigned i = 0, e = STyL->getNumElements(); i != e; ++i) {
419       if (int Res = cmpTypes(STyL->getElementType(i), STyR->getElementType(i)))
420         return Res;
421     }
422     return 0;
423   }
424 
425   case Type::FunctionTyID: {
426     FunctionType *FTyL = cast<FunctionType>(TyL);
427     FunctionType *FTyR = cast<FunctionType>(TyR);
428     if (FTyL->getNumParams() != FTyR->getNumParams())
429       return cmpNumbers(FTyL->getNumParams(), FTyR->getNumParams());
430 
431     if (FTyL->isVarArg() != FTyR->isVarArg())
432       return cmpNumbers(FTyL->isVarArg(), FTyR->isVarArg());
433 
434     if (int Res = cmpTypes(FTyL->getReturnType(), FTyR->getReturnType()))
435       return Res;
436 
437     for (unsigned i = 0, e = FTyL->getNumParams(); i != e; ++i) {
438       if (int Res = cmpTypes(FTyL->getParamType(i), FTyR->getParamType(i)))
439         return Res;
440     }
441     return 0;
442   }
443 
444   case Type::ArrayTyID:
445   case Type::VectorTyID: {
446     auto *STyL = cast<SequentialType>(TyL);
447     auto *STyR = cast<SequentialType>(TyR);
448     if (STyL->getNumElements() != STyR->getNumElements())
449       return cmpNumbers(STyL->getNumElements(), STyR->getNumElements());
450     return cmpTypes(STyL->getElementType(), STyR->getElementType());
451   }
452   }
453 }
454 
455 // Determine whether the two operations are the same except that pointer-to-A
456 // and pointer-to-B are equivalent. This should be kept in sync with
457 // Instruction::isSameOperationAs.
458 // Read method declaration comments for more details.
459 int FunctionComparator::cmpOperations(const Instruction *L,
460                                       const Instruction *R,
461                                       bool &needToCmpOperands) const {
462   needToCmpOperands = true;
463   if (int Res = cmpValues(L, R))
464     return Res;
465 
466   // Differences from Instruction::isSameOperationAs:
467   //  * replace type comparison with calls to cmpTypes.
468   //  * we test for I->getRawSubclassOptionalData (nuw/nsw/tail) at the top.
469   //  * because of the above, we don't test for the tail bit on calls later on.
470   if (int Res = cmpNumbers(L->getOpcode(), R->getOpcode()))
471     return Res;
472 
473   if (const GetElementPtrInst *GEPL = dyn_cast<GetElementPtrInst>(L)) {
474     needToCmpOperands = false;
475     const GetElementPtrInst *GEPR = cast<GetElementPtrInst>(R);
476     if (int Res =
477             cmpValues(GEPL->getPointerOperand(), GEPR->getPointerOperand()))
478       return Res;
479     return cmpGEPs(GEPL, GEPR);
480   }
481 
482   if (int Res = cmpNumbers(L->getNumOperands(), R->getNumOperands()))
483     return Res;
484 
485   if (int Res = cmpTypes(L->getType(), R->getType()))
486     return Res;
487 
488   if (int Res = cmpNumbers(L->getRawSubclassOptionalData(),
489                            R->getRawSubclassOptionalData()))
490     return Res;
491 
492   // We have two instructions of identical opcode and #operands.  Check to see
493   // if all operands are the same type
494   for (unsigned i = 0, e = L->getNumOperands(); i != e; ++i) {
495     if (int Res =
496             cmpTypes(L->getOperand(i)->getType(), R->getOperand(i)->getType()))
497       return Res;
498   }
499 
500   // Check special state that is a part of some instructions.
501   if (const AllocaInst *AI = dyn_cast<AllocaInst>(L)) {
502     if (int Res = cmpTypes(AI->getAllocatedType(),
503                            cast<AllocaInst>(R)->getAllocatedType()))
504       return Res;
505     return cmpNumbers(AI->getAlignment(), cast<AllocaInst>(R)->getAlignment());
506   }
507   if (const LoadInst *LI = dyn_cast<LoadInst>(L)) {
508     if (int Res = cmpNumbers(LI->isVolatile(), cast<LoadInst>(R)->isVolatile()))
509       return Res;
510     if (int Res =
511             cmpNumbers(LI->getAlignment(), cast<LoadInst>(R)->getAlignment()))
512       return Res;
513     if (int Res =
514             cmpOrderings(LI->getOrdering(), cast<LoadInst>(R)->getOrdering()))
515       return Res;
516     if (int Res = cmpNumbers(LI->getSyncScopeID(),
517                              cast<LoadInst>(R)->getSyncScopeID()))
518       return Res;
519     return cmpRangeMetadata(LI->getMetadata(LLVMContext::MD_range),
520         cast<LoadInst>(R)->getMetadata(LLVMContext::MD_range));
521   }
522   if (const StoreInst *SI = dyn_cast<StoreInst>(L)) {
523     if (int Res =
524             cmpNumbers(SI->isVolatile(), cast<StoreInst>(R)->isVolatile()))
525       return Res;
526     if (int Res =
527             cmpNumbers(SI->getAlignment(), cast<StoreInst>(R)->getAlignment()))
528       return Res;
529     if (int Res =
530             cmpOrderings(SI->getOrdering(), cast<StoreInst>(R)->getOrdering()))
531       return Res;
532     return cmpNumbers(SI->getSyncScopeID(),
533                       cast<StoreInst>(R)->getSyncScopeID());
534   }
535   if (const CmpInst *CI = dyn_cast<CmpInst>(L))
536     return cmpNumbers(CI->getPredicate(), cast<CmpInst>(R)->getPredicate());
537   if (const CallInst *CI = dyn_cast<CallInst>(L)) {
538     if (int Res = cmpNumbers(CI->getCallingConv(),
539                              cast<CallInst>(R)->getCallingConv()))
540       return Res;
541     if (int Res =
542             cmpAttrs(CI->getAttributes(), cast<CallInst>(R)->getAttributes()))
543       return Res;
544     if (int Res = cmpOperandBundlesSchema(CI, R))
545       return Res;
546     return cmpRangeMetadata(
547         CI->getMetadata(LLVMContext::MD_range),
548         cast<CallInst>(R)->getMetadata(LLVMContext::MD_range));
549   }
550   if (const InvokeInst *II = dyn_cast<InvokeInst>(L)) {
551     if (int Res = cmpNumbers(II->getCallingConv(),
552                              cast<InvokeInst>(R)->getCallingConv()))
553       return Res;
554     if (int Res =
555             cmpAttrs(II->getAttributes(), cast<InvokeInst>(R)->getAttributes()))
556       return Res;
557     if (int Res = cmpOperandBundlesSchema(II, R))
558       return Res;
559     return cmpRangeMetadata(
560         II->getMetadata(LLVMContext::MD_range),
561         cast<InvokeInst>(R)->getMetadata(LLVMContext::MD_range));
562   }
563   if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(L)) {
564     ArrayRef<unsigned> LIndices = IVI->getIndices();
565     ArrayRef<unsigned> RIndices = cast<InsertValueInst>(R)->getIndices();
566     if (int Res = cmpNumbers(LIndices.size(), RIndices.size()))
567       return Res;
568     for (size_t i = 0, e = LIndices.size(); i != e; ++i) {
569       if (int Res = cmpNumbers(LIndices[i], RIndices[i]))
570         return Res;
571     }
572     return 0;
573   }
574   if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(L)) {
575     ArrayRef<unsigned> LIndices = EVI->getIndices();
576     ArrayRef<unsigned> RIndices = cast<ExtractValueInst>(R)->getIndices();
577     if (int Res = cmpNumbers(LIndices.size(), RIndices.size()))
578       return Res;
579     for (size_t i = 0, e = LIndices.size(); i != e; ++i) {
580       if (int Res = cmpNumbers(LIndices[i], RIndices[i]))
581         return Res;
582     }
583   }
584   if (const FenceInst *FI = dyn_cast<FenceInst>(L)) {
585     if (int Res =
586             cmpOrderings(FI->getOrdering(), cast<FenceInst>(R)->getOrdering()))
587       return Res;
588     return cmpNumbers(FI->getSyncScopeID(),
589                       cast<FenceInst>(R)->getSyncScopeID());
590   }
591   if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(L)) {
592     if (int Res = cmpNumbers(CXI->isVolatile(),
593                              cast<AtomicCmpXchgInst>(R)->isVolatile()))
594       return Res;
595     if (int Res = cmpNumbers(CXI->isWeak(),
596                              cast<AtomicCmpXchgInst>(R)->isWeak()))
597       return Res;
598     if (int Res =
599             cmpOrderings(CXI->getSuccessOrdering(),
600                          cast<AtomicCmpXchgInst>(R)->getSuccessOrdering()))
601       return Res;
602     if (int Res =
603             cmpOrderings(CXI->getFailureOrdering(),
604                          cast<AtomicCmpXchgInst>(R)->getFailureOrdering()))
605       return Res;
606     return cmpNumbers(CXI->getSyncScopeID(),
607                       cast<AtomicCmpXchgInst>(R)->getSyncScopeID());
608   }
609   if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(L)) {
610     if (int Res = cmpNumbers(RMWI->getOperation(),
611                              cast<AtomicRMWInst>(R)->getOperation()))
612       return Res;
613     if (int Res = cmpNumbers(RMWI->isVolatile(),
614                              cast<AtomicRMWInst>(R)->isVolatile()))
615       return Res;
616     if (int Res = cmpOrderings(RMWI->getOrdering(),
617                              cast<AtomicRMWInst>(R)->getOrdering()))
618       return Res;
619     return cmpNumbers(RMWI->getSyncScopeID(),
620                       cast<AtomicRMWInst>(R)->getSyncScopeID());
621   }
622   if (const PHINode *PNL = dyn_cast<PHINode>(L)) {
623     const PHINode *PNR = cast<PHINode>(R);
624     // Ensure that in addition to the incoming values being identical
625     // (checked by the caller of this function), the incoming blocks
626     // are also identical.
627     for (unsigned i = 0, e = PNL->getNumIncomingValues(); i != e; ++i) {
628       if (int Res =
629               cmpValues(PNL->getIncomingBlock(i), PNR->getIncomingBlock(i)))
630         return Res;
631     }
632   }
633   return 0;
634 }
635 
636 // Determine whether two GEP operations perform the same underlying arithmetic.
637 // Read method declaration comments for more details.
638 int FunctionComparator::cmpGEPs(const GEPOperator *GEPL,
639                                 const GEPOperator *GEPR) const {
640 
641   unsigned int ASL = GEPL->getPointerAddressSpace();
642   unsigned int ASR = GEPR->getPointerAddressSpace();
643 
644   if (int Res = cmpNumbers(ASL, ASR))
645     return Res;
646 
647   // When we have target data, we can reduce the GEP down to the value in bytes
648   // added to the address.
649   const DataLayout &DL = FnL->getParent()->getDataLayout();
650   unsigned BitWidth = DL.getPointerSizeInBits(ASL);
651   APInt OffsetL(BitWidth, 0), OffsetR(BitWidth, 0);
652   if (GEPL->accumulateConstantOffset(DL, OffsetL) &&
653       GEPR->accumulateConstantOffset(DL, OffsetR))
654     return cmpAPInts(OffsetL, OffsetR);
655   if (int Res = cmpTypes(GEPL->getSourceElementType(),
656                          GEPR->getSourceElementType()))
657     return Res;
658 
659   if (int Res = cmpNumbers(GEPL->getNumOperands(), GEPR->getNumOperands()))
660     return Res;
661 
662   for (unsigned i = 0, e = GEPL->getNumOperands(); i != e; ++i) {
663     if (int Res = cmpValues(GEPL->getOperand(i), GEPR->getOperand(i)))
664       return Res;
665   }
666 
667   return 0;
668 }
669 
670 int FunctionComparator::cmpInlineAsm(const InlineAsm *L,
671                                      const InlineAsm *R) const {
672   // InlineAsm's are uniqued. If they are the same pointer, obviously they are
673   // the same, otherwise compare the fields.
674   if (L == R)
675     return 0;
676   if (int Res = cmpTypes(L->getFunctionType(), R->getFunctionType()))
677     return Res;
678   if (int Res = cmpMem(L->getAsmString(), R->getAsmString()))
679     return Res;
680   if (int Res = cmpMem(L->getConstraintString(), R->getConstraintString()))
681     return Res;
682   if (int Res = cmpNumbers(L->hasSideEffects(), R->hasSideEffects()))
683     return Res;
684   if (int Res = cmpNumbers(L->isAlignStack(), R->isAlignStack()))
685     return Res;
686   if (int Res = cmpNumbers(L->getDialect(), R->getDialect()))
687     return Res;
688   llvm_unreachable("InlineAsm blocks were not uniqued.");
689   return 0;
690 }
691 
692 /// Compare two values used by the two functions under pair-wise comparison. If
693 /// this is the first time the values are seen, they're added to the mapping so
694 /// that we will detect mismatches on next use.
695 /// See comments in declaration for more details.
696 int FunctionComparator::cmpValues(const Value *L, const Value *R) const {
697   // Catch self-reference case.
698   if (L == FnL) {
699     if (R == FnR)
700       return 0;
701     return -1;
702   }
703   if (R == FnR) {
704     if (L == FnL)
705       return 0;
706     return 1;
707   }
708 
709   const Constant *ConstL = dyn_cast<Constant>(L);
710   const Constant *ConstR = dyn_cast<Constant>(R);
711   if (ConstL && ConstR) {
712     if (L == R)
713       return 0;
714     return cmpConstants(ConstL, ConstR);
715   }
716 
717   if (ConstL)
718     return 1;
719   if (ConstR)
720     return -1;
721 
722   const InlineAsm *InlineAsmL = dyn_cast<InlineAsm>(L);
723   const InlineAsm *InlineAsmR = dyn_cast<InlineAsm>(R);
724 
725   if (InlineAsmL && InlineAsmR)
726     return cmpInlineAsm(InlineAsmL, InlineAsmR);
727   if (InlineAsmL)
728     return 1;
729   if (InlineAsmR)
730     return -1;
731 
732   auto LeftSN = sn_mapL.insert(std::make_pair(L, sn_mapL.size())),
733        RightSN = sn_mapR.insert(std::make_pair(R, sn_mapR.size()));
734 
735   return cmpNumbers(LeftSN.first->second, RightSN.first->second);
736 }
737 
738 // Test whether two basic blocks have equivalent behaviour.
739 int FunctionComparator::cmpBasicBlocks(const BasicBlock *BBL,
740                                        const BasicBlock *BBR) const {
741   BasicBlock::const_iterator InstL = BBL->begin(), InstLE = BBL->end();
742   BasicBlock::const_iterator InstR = BBR->begin(), InstRE = BBR->end();
743 
744   do {
745     bool needToCmpOperands = true;
746     if (int Res = cmpOperations(&*InstL, &*InstR, needToCmpOperands))
747       return Res;
748     if (needToCmpOperands) {
749       assert(InstL->getNumOperands() == InstR->getNumOperands());
750 
751       for (unsigned i = 0, e = InstL->getNumOperands(); i != e; ++i) {
752         Value *OpL = InstL->getOperand(i);
753         Value *OpR = InstR->getOperand(i);
754         if (int Res = cmpValues(OpL, OpR))
755           return Res;
756         // cmpValues should ensure this is true.
757         assert(cmpTypes(OpL->getType(), OpR->getType()) == 0);
758       }
759     }
760 
761     ++InstL;
762     ++InstR;
763   } while (InstL != InstLE && InstR != InstRE);
764 
765   if (InstL != InstLE && InstR == InstRE)
766     return 1;
767   if (InstL == InstLE && InstR != InstRE)
768     return -1;
769   return 0;
770 }
771 
772 int FunctionComparator::compareSignature() const {
773   if (int Res = cmpAttrs(FnL->getAttributes(), FnR->getAttributes()))
774     return Res;
775 
776   if (int Res = cmpNumbers(FnL->hasGC(), FnR->hasGC()))
777     return Res;
778 
779   if (FnL->hasGC()) {
780     if (int Res = cmpMem(FnL->getGC(), FnR->getGC()))
781       return Res;
782   }
783 
784   if (int Res = cmpNumbers(FnL->hasSection(), FnR->hasSection()))
785     return Res;
786 
787   if (FnL->hasSection()) {
788     if (int Res = cmpMem(FnL->getSection(), FnR->getSection()))
789       return Res;
790   }
791 
792   if (int Res = cmpNumbers(FnL->isVarArg(), FnR->isVarArg()))
793     return Res;
794 
795   // TODO: if it's internal and only used in direct calls, we could handle this
796   // case too.
797   if (int Res = cmpNumbers(FnL->getCallingConv(), FnR->getCallingConv()))
798     return Res;
799 
800   if (int Res = cmpTypes(FnL->getFunctionType(), FnR->getFunctionType()))
801     return Res;
802 
803   assert(FnL->arg_size() == FnR->arg_size() &&
804          "Identically typed functions have different numbers of args!");
805 
806   // Visit the arguments so that they get enumerated in the order they're
807   // passed in.
808   for (Function::const_arg_iterator ArgLI = FnL->arg_begin(),
809        ArgRI = FnR->arg_begin(),
810        ArgLE = FnL->arg_end();
811        ArgLI != ArgLE; ++ArgLI, ++ArgRI) {
812     if (cmpValues(&*ArgLI, &*ArgRI) != 0)
813       llvm_unreachable("Arguments repeat!");
814   }
815   return 0;
816 }
817 
818 // Test whether the two functions have equivalent behaviour.
819 int FunctionComparator::compare() {
820   beginCompare();
821 
822   if (int Res = compareSignature())
823     return Res;
824 
825   // We do a CFG-ordered walk since the actual ordering of the blocks in the
826   // linked list is immaterial. Our walk starts at the entry block for both
827   // functions, then takes each block from each terminator in order. As an
828   // artifact, this also means that unreachable blocks are ignored.
829   SmallVector<const BasicBlock *, 8> FnLBBs, FnRBBs;
830   SmallPtrSet<const BasicBlock *, 32> VisitedBBs; // in terms of F1.
831 
832   FnLBBs.push_back(&FnL->getEntryBlock());
833   FnRBBs.push_back(&FnR->getEntryBlock());
834 
835   VisitedBBs.insert(FnLBBs[0]);
836   while (!FnLBBs.empty()) {
837     const BasicBlock *BBL = FnLBBs.pop_back_val();
838     const BasicBlock *BBR = FnRBBs.pop_back_val();
839 
840     if (int Res = cmpValues(BBL, BBR))
841       return Res;
842 
843     if (int Res = cmpBasicBlocks(BBL, BBR))
844       return Res;
845 
846     const TerminatorInst *TermL = BBL->getTerminator();
847     const TerminatorInst *TermR = BBR->getTerminator();
848 
849     assert(TermL->getNumSuccessors() == TermR->getNumSuccessors());
850     for (unsigned i = 0, e = TermL->getNumSuccessors(); i != e; ++i) {
851       if (!VisitedBBs.insert(TermL->getSuccessor(i)).second)
852         continue;
853 
854       FnLBBs.push_back(TermL->getSuccessor(i));
855       FnRBBs.push_back(TermR->getSuccessor(i));
856     }
857   }
858   return 0;
859 }
860 
861 namespace {
862 
863 // Accumulate the hash of a sequence of 64-bit integers. This is similar to a
864 // hash of a sequence of 64bit ints, but the entire input does not need to be
865 // available at once. This interface is necessary for functionHash because it
866 // needs to accumulate the hash as the structure of the function is traversed
867 // without saving these values to an intermediate buffer. This form of hashing
868 // is not often needed, as usually the object to hash is just read from a
869 // buffer.
870 class HashAccumulator64 {
871   uint64_t Hash;
872 public:
873   // Initialize to random constant, so the state isn't zero.
874   HashAccumulator64() { Hash = 0x6acaa36bef8325c5ULL; }
875   void add(uint64_t V) {
876      Hash = llvm::hashing::detail::hash_16_bytes(Hash, V);
877   }
878   // No finishing is required, because the entire hash value is used.
879   uint64_t getHash() { return Hash; }
880 };
881 } // end anonymous namespace
882 
883 // A function hash is calculated by considering only the number of arguments and
884 // whether a function is varargs, the order of basic blocks (given by the
885 // successors of each basic block in depth first order), and the order of
886 // opcodes of each instruction within each of these basic blocks. This mirrors
887 // the strategy compare() uses to compare functions by walking the BBs in depth
888 // first order and comparing each instruction in sequence. Because this hash
889 // does not look at the operands, it is insensitive to things such as the
890 // target of calls and the constants used in the function, which makes it useful
891 // when possibly merging functions which are the same modulo constants and call
892 // targets.
893 FunctionComparator::FunctionHash FunctionComparator::functionHash(Function &F) {
894   HashAccumulator64 H;
895   H.add(F.isVarArg());
896   H.add(F.arg_size());
897 
898   SmallVector<const BasicBlock *, 8> BBs;
899   SmallSet<const BasicBlock *, 16> VisitedBBs;
900 
901   // Walk the blocks in the same order as FunctionComparator::cmpBasicBlocks(),
902   // accumulating the hash of the function "structure." (BB and opcode sequence)
903   BBs.push_back(&F.getEntryBlock());
904   VisitedBBs.insert(BBs[0]);
905   while (!BBs.empty()) {
906     const BasicBlock *BB = BBs.pop_back_val();
907     // This random value acts as a block header, as otherwise the partition of
908     // opcodes into BBs wouldn't affect the hash, only the order of the opcodes
909     H.add(45798);
910     for (auto &Inst : *BB) {
911       H.add(Inst.getOpcode());
912     }
913     const TerminatorInst *Term = BB->getTerminator();
914     for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
915       if (!VisitedBBs.insert(Term->getSuccessor(i)).second)
916         continue;
917       BBs.push_back(Term->getSuccessor(i));
918     }
919   }
920   return H.getHash();
921 }
922 
923 
924