1 //===- llvm/CodeGen/GlobalISel/IRTranslator.cpp - IRTranslator ---*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file implements the IRTranslator class.
10 //===----------------------------------------------------------------------===//
11 
12 #include "llvm/CodeGen/GlobalISel/IRTranslator.h"
13 #include "llvm/ADT/PostOrderIterator.h"
14 #include "llvm/ADT/STLExtras.h"
15 #include "llvm/ADT/ScopeExit.h"
16 #include "llvm/ADT/SmallSet.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/Analysis/BranchProbabilityInfo.h"
19 #include "llvm/Analysis/Loads.h"
20 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
21 #include "llvm/Analysis/ValueTracking.h"
22 #include "llvm/CodeGen/Analysis.h"
23 #include "llvm/CodeGen/GlobalISel/CallLowering.h"
24 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
25 #include "llvm/CodeGen/GlobalISel/InlineAsmLowering.h"
26 #include "llvm/CodeGen/LowLevelType.h"
27 #include "llvm/CodeGen/MachineBasicBlock.h"
28 #include "llvm/CodeGen/MachineFrameInfo.h"
29 #include "llvm/CodeGen/MachineFunction.h"
30 #include "llvm/CodeGen/MachineInstrBuilder.h"
31 #include "llvm/CodeGen/MachineMemOperand.h"
32 #include "llvm/CodeGen/MachineModuleInfo.h"
33 #include "llvm/CodeGen/MachineOperand.h"
34 #include "llvm/CodeGen/MachineRegisterInfo.h"
35 #include "llvm/CodeGen/StackProtector.h"
36 #include "llvm/CodeGen/SwitchLoweringUtils.h"
37 #include "llvm/CodeGen/TargetFrameLowering.h"
38 #include "llvm/CodeGen/TargetInstrInfo.h"
39 #include "llvm/CodeGen/TargetLowering.h"
40 #include "llvm/CodeGen/TargetPassConfig.h"
41 #include "llvm/CodeGen/TargetRegisterInfo.h"
42 #include "llvm/CodeGen/TargetSubtargetInfo.h"
43 #include "llvm/IR/BasicBlock.h"
44 #include "llvm/IR/CFG.h"
45 #include "llvm/IR/Constant.h"
46 #include "llvm/IR/Constants.h"
47 #include "llvm/IR/DataLayout.h"
48 #include "llvm/IR/DebugInfo.h"
49 #include "llvm/IR/DerivedTypes.h"
50 #include "llvm/IR/Function.h"
51 #include "llvm/IR/GetElementPtrTypeIterator.h"
52 #include "llvm/IR/InlineAsm.h"
53 #include "llvm/IR/InstrTypes.h"
54 #include "llvm/IR/Instructions.h"
55 #include "llvm/IR/IntrinsicInst.h"
56 #include "llvm/IR/Intrinsics.h"
57 #include "llvm/IR/LLVMContext.h"
58 #include "llvm/IR/Metadata.h"
59 #include "llvm/IR/PatternMatch.h"
60 #include "llvm/IR/Type.h"
61 #include "llvm/IR/User.h"
62 #include "llvm/IR/Value.h"
63 #include "llvm/InitializePasses.h"
64 #include "llvm/MC/MCContext.h"
65 #include "llvm/Pass.h"
66 #include "llvm/Support/Casting.h"
67 #include "llvm/Support/CodeGen.h"
68 #include "llvm/Support/Debug.h"
69 #include "llvm/Support/ErrorHandling.h"
70 #include "llvm/Support/LowLevelTypeImpl.h"
71 #include "llvm/Support/MathExtras.h"
72 #include "llvm/Support/raw_ostream.h"
73 #include "llvm/Target/TargetIntrinsicInfo.h"
74 #include "llvm/Target/TargetMachine.h"
75 #include <algorithm>
76 #include <cassert>
77 #include <cstddef>
78 #include <cstdint>
79 #include <iterator>
80 #include <string>
81 #include <utility>
82 #include <vector>
83 
84 #define DEBUG_TYPE "irtranslator"
85 
86 using namespace llvm;
87 
88 static cl::opt<bool>
89     EnableCSEInIRTranslator("enable-cse-in-irtranslator",
90                             cl::desc("Should enable CSE in irtranslator"),
91                             cl::Optional, cl::init(false));
92 char IRTranslator::ID = 0;
93 
94 INITIALIZE_PASS_BEGIN(IRTranslator, DEBUG_TYPE, "IRTranslator LLVM IR -> MI",
95                 false, false)
96 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
97 INITIALIZE_PASS_DEPENDENCY(GISelCSEAnalysisWrapperPass)
98 INITIALIZE_PASS_END(IRTranslator, DEBUG_TYPE, "IRTranslator LLVM IR -> MI",
99                 false, false)
100 
101 static void reportTranslationError(MachineFunction &MF,
102                                    const TargetPassConfig &TPC,
103                                    OptimizationRemarkEmitter &ORE,
104                                    OptimizationRemarkMissed &R) {
105   MF.getProperties().set(MachineFunctionProperties::Property::FailedISel);
106 
107   // Print the function name explicitly if we don't have a debug location (which
108   // makes the diagnostic less useful) or if we're going to emit a raw error.
109   if (!R.getLocation().isValid() || TPC.isGlobalISelAbortEnabled())
110     R << (" (in function: " + MF.getName() + ")").str();
111 
112   if (TPC.isGlobalISelAbortEnabled())
113     report_fatal_error(R.getMsg());
114   else
115     ORE.emit(R);
116 }
117 
118 IRTranslator::IRTranslator(CodeGenOpt::Level optlevel)
119     : MachineFunctionPass(ID), OptLevel(optlevel) {}
120 
121 #ifndef NDEBUG
122 namespace {
123 /// Verify that every instruction created has the same DILocation as the
124 /// instruction being translated.
125 class DILocationVerifier : public GISelChangeObserver {
126   const Instruction *CurrInst = nullptr;
127 
128 public:
129   DILocationVerifier() = default;
130   ~DILocationVerifier() = default;
131 
132   const Instruction *getCurrentInst() const { return CurrInst; }
133   void setCurrentInst(const Instruction *Inst) { CurrInst = Inst; }
134 
135   void erasingInstr(MachineInstr &MI) override {}
136   void changingInstr(MachineInstr &MI) override {}
137   void changedInstr(MachineInstr &MI) override {}
138 
139   void createdInstr(MachineInstr &MI) override {
140     assert(getCurrentInst() && "Inserted instruction without a current MI");
141 
142     // Only print the check message if we're actually checking it.
143 #ifndef NDEBUG
144     LLVM_DEBUG(dbgs() << "Checking DILocation from " << *CurrInst
145                       << " was copied to " << MI);
146 #endif
147     // We allow insts in the entry block to have a debug loc line of 0 because
148     // they could have originated from constants, and we don't want a jumpy
149     // debug experience.
150     assert((CurrInst->getDebugLoc() == MI.getDebugLoc() ||
151             MI.getDebugLoc().getLine() == 0) &&
152            "Line info was not transferred to all instructions");
153   }
154 };
155 } // namespace
156 #endif // ifndef NDEBUG
157 
158 
159 void IRTranslator::getAnalysisUsage(AnalysisUsage &AU) const {
160   AU.addRequired<StackProtector>();
161   AU.addRequired<TargetPassConfig>();
162   AU.addRequired<GISelCSEAnalysisWrapperPass>();
163   if (OptLevel != CodeGenOpt::None)
164     AU.addRequired<BranchProbabilityInfoWrapperPass>();
165   getSelectionDAGFallbackAnalysisUsage(AU);
166   MachineFunctionPass::getAnalysisUsage(AU);
167 }
168 
169 IRTranslator::ValueToVRegInfo::VRegListT &
170 IRTranslator::allocateVRegs(const Value &Val) {
171   assert(!VMap.contains(Val) && "Value already allocated in VMap");
172   auto *Regs = VMap.getVRegs(Val);
173   auto *Offsets = VMap.getOffsets(Val);
174   SmallVector<LLT, 4> SplitTys;
175   computeValueLLTs(*DL, *Val.getType(), SplitTys,
176                    Offsets->empty() ? Offsets : nullptr);
177   for (unsigned i = 0; i < SplitTys.size(); ++i)
178     Regs->push_back(0);
179   return *Regs;
180 }
181 
182 ArrayRef<Register> IRTranslator::getOrCreateVRegs(const Value &Val) {
183   auto VRegsIt = VMap.findVRegs(Val);
184   if (VRegsIt != VMap.vregs_end())
185     return *VRegsIt->second;
186 
187   if (Val.getType()->isVoidTy())
188     return *VMap.getVRegs(Val);
189 
190   // Create entry for this type.
191   auto *VRegs = VMap.getVRegs(Val);
192   auto *Offsets = VMap.getOffsets(Val);
193 
194   assert(Val.getType()->isSized() &&
195          "Don't know how to create an empty vreg");
196 
197   SmallVector<LLT, 4> SplitTys;
198   computeValueLLTs(*DL, *Val.getType(), SplitTys,
199                    Offsets->empty() ? Offsets : nullptr);
200 
201   if (!isa<Constant>(Val)) {
202     for (auto Ty : SplitTys)
203       VRegs->push_back(MRI->createGenericVirtualRegister(Ty));
204     return *VRegs;
205   }
206 
207   if (Val.getType()->isAggregateType()) {
208     // UndefValue, ConstantAggregateZero
209     auto &C = cast<Constant>(Val);
210     unsigned Idx = 0;
211     while (auto Elt = C.getAggregateElement(Idx++)) {
212       auto EltRegs = getOrCreateVRegs(*Elt);
213       llvm::copy(EltRegs, std::back_inserter(*VRegs));
214     }
215   } else {
216     assert(SplitTys.size() == 1 && "unexpectedly split LLT");
217     VRegs->push_back(MRI->createGenericVirtualRegister(SplitTys[0]));
218     bool Success = translate(cast<Constant>(Val), VRegs->front());
219     if (!Success) {
220       OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
221                                  MF->getFunction().getSubprogram(),
222                                  &MF->getFunction().getEntryBlock());
223       R << "unable to translate constant: " << ore::NV("Type", Val.getType());
224       reportTranslationError(*MF, *TPC, *ORE, R);
225       return *VRegs;
226     }
227   }
228 
229   return *VRegs;
230 }
231 
232 int IRTranslator::getOrCreateFrameIndex(const AllocaInst &AI) {
233   auto MapEntry = FrameIndices.find(&AI);
234   if (MapEntry != FrameIndices.end())
235     return MapEntry->second;
236 
237   uint64_t ElementSize = DL->getTypeAllocSize(AI.getAllocatedType());
238   uint64_t Size =
239       ElementSize * cast<ConstantInt>(AI.getArraySize())->getZExtValue();
240 
241   // Always allocate at least one byte.
242   Size = std::max<uint64_t>(Size, 1u);
243 
244   int &FI = FrameIndices[&AI];
245   FI = MF->getFrameInfo().CreateStackObject(Size, AI.getAlign(), false, &AI);
246   return FI;
247 }
248 
249 Align IRTranslator::getMemOpAlign(const Instruction &I) {
250   if (const StoreInst *SI = dyn_cast<StoreInst>(&I))
251     return SI->getAlign();
252   if (const LoadInst *LI = dyn_cast<LoadInst>(&I)) {
253     return LI->getAlign();
254   }
255   if (const AtomicCmpXchgInst *AI = dyn_cast<AtomicCmpXchgInst>(&I)) {
256     // TODO(PR27168): This instruction has no alignment attribute, but unlike
257     // the default alignment for load/store, the default here is to assume
258     // it has NATURAL alignment, not DataLayout-specified alignment.
259     const DataLayout &DL = AI->getModule()->getDataLayout();
260     return Align(DL.getTypeStoreSize(AI->getCompareOperand()->getType()));
261   }
262   if (const AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(&I)) {
263     // TODO(PR27168): This instruction has no alignment attribute, but unlike
264     // the default alignment for load/store, the default here is to assume
265     // it has NATURAL alignment, not DataLayout-specified alignment.
266     const DataLayout &DL = AI->getModule()->getDataLayout();
267     return Align(DL.getTypeStoreSize(AI->getValOperand()->getType()));
268   }
269   OptimizationRemarkMissed R("gisel-irtranslator", "", &I);
270   R << "unable to translate memop: " << ore::NV("Opcode", &I);
271   reportTranslationError(*MF, *TPC, *ORE, R);
272   return Align(1);
273 }
274 
275 MachineBasicBlock &IRTranslator::getMBB(const BasicBlock &BB) {
276   MachineBasicBlock *&MBB = BBToMBB[&BB];
277   assert(MBB && "BasicBlock was not encountered before");
278   return *MBB;
279 }
280 
281 void IRTranslator::addMachineCFGPred(CFGEdge Edge, MachineBasicBlock *NewPred) {
282   assert(NewPred && "new predecessor must be a real MachineBasicBlock");
283   MachinePreds[Edge].push_back(NewPred);
284 }
285 
286 bool IRTranslator::translateBinaryOp(unsigned Opcode, const User &U,
287                                      MachineIRBuilder &MIRBuilder) {
288   // Get or create a virtual register for each value.
289   // Unless the value is a Constant => loadimm cst?
290   // or inline constant each time?
291   // Creation of a virtual register needs to have a size.
292   Register Op0 = getOrCreateVReg(*U.getOperand(0));
293   Register Op1 = getOrCreateVReg(*U.getOperand(1));
294   Register Res = getOrCreateVReg(U);
295   uint16_t Flags = 0;
296   if (isa<Instruction>(U)) {
297     const Instruction &I = cast<Instruction>(U);
298     Flags = MachineInstr::copyFlagsFromInstruction(I);
299   }
300 
301   MIRBuilder.buildInstr(Opcode, {Res}, {Op0, Op1}, Flags);
302   return true;
303 }
304 
305 bool IRTranslator::translateUnaryOp(unsigned Opcode, const User &U,
306                                     MachineIRBuilder &MIRBuilder) {
307   Register Op0 = getOrCreateVReg(*U.getOperand(0));
308   Register Res = getOrCreateVReg(U);
309   uint16_t Flags = 0;
310   if (isa<Instruction>(U)) {
311     const Instruction &I = cast<Instruction>(U);
312     Flags = MachineInstr::copyFlagsFromInstruction(I);
313   }
314   MIRBuilder.buildInstr(Opcode, {Res}, {Op0}, Flags);
315   return true;
316 }
317 
318 bool IRTranslator::translateFNeg(const User &U, MachineIRBuilder &MIRBuilder) {
319   return translateUnaryOp(TargetOpcode::G_FNEG, U, MIRBuilder);
320 }
321 
322 bool IRTranslator::translateCompare(const User &U,
323                                     MachineIRBuilder &MIRBuilder) {
324   auto *CI = dyn_cast<CmpInst>(&U);
325   Register Op0 = getOrCreateVReg(*U.getOperand(0));
326   Register Op1 = getOrCreateVReg(*U.getOperand(1));
327   Register Res = getOrCreateVReg(U);
328   CmpInst::Predicate Pred =
329       CI ? CI->getPredicate() : static_cast<CmpInst::Predicate>(
330                                     cast<ConstantExpr>(U).getPredicate());
331   if (CmpInst::isIntPredicate(Pred))
332     MIRBuilder.buildICmp(Pred, Res, Op0, Op1);
333   else if (Pred == CmpInst::FCMP_FALSE)
334     MIRBuilder.buildCopy(
335         Res, getOrCreateVReg(*Constant::getNullValue(U.getType())));
336   else if (Pred == CmpInst::FCMP_TRUE)
337     MIRBuilder.buildCopy(
338         Res, getOrCreateVReg(*Constant::getAllOnesValue(U.getType())));
339   else {
340     assert(CI && "Instruction should be CmpInst");
341     MIRBuilder.buildFCmp(Pred, Res, Op0, Op1,
342                          MachineInstr::copyFlagsFromInstruction(*CI));
343   }
344 
345   return true;
346 }
347 
348 bool IRTranslator::translateRet(const User &U, MachineIRBuilder &MIRBuilder) {
349   const ReturnInst &RI = cast<ReturnInst>(U);
350   const Value *Ret = RI.getReturnValue();
351   if (Ret && DL->getTypeStoreSize(Ret->getType()) == 0)
352     Ret = nullptr;
353 
354   ArrayRef<Register> VRegs;
355   if (Ret)
356     VRegs = getOrCreateVRegs(*Ret);
357 
358   Register SwiftErrorVReg = 0;
359   if (CLI->supportSwiftError() && SwiftError.getFunctionArg()) {
360     SwiftErrorVReg = SwiftError.getOrCreateVRegUseAt(
361         &RI, &MIRBuilder.getMBB(), SwiftError.getFunctionArg());
362   }
363 
364   // The target may mess up with the insertion point, but
365   // this is not important as a return is the last instruction
366   // of the block anyway.
367   return CLI->lowerReturn(MIRBuilder, Ret, VRegs, SwiftErrorVReg);
368 }
369 
370 void IRTranslator::emitBranchForMergedCondition(
371     const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
372     MachineBasicBlock *CurBB, MachineBasicBlock *SwitchBB,
373     BranchProbability TProb, BranchProbability FProb, bool InvertCond) {
374   // If the leaf of the tree is a comparison, merge the condition into
375   // the caseblock.
376   if (const CmpInst *BOp = dyn_cast<CmpInst>(Cond)) {
377     CmpInst::Predicate Condition;
378     if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) {
379       Condition = InvertCond ? IC->getInversePredicate() : IC->getPredicate();
380     } else {
381       const FCmpInst *FC = cast<FCmpInst>(Cond);
382       Condition = InvertCond ? FC->getInversePredicate() : FC->getPredicate();
383     }
384 
385     SwitchCG::CaseBlock CB(Condition, false, BOp->getOperand(0),
386                            BOp->getOperand(1), nullptr, TBB, FBB, CurBB,
387                            CurBuilder->getDebugLoc(), TProb, FProb);
388     SL->SwitchCases.push_back(CB);
389     return;
390   }
391 
392   // Create a CaseBlock record representing this branch.
393   CmpInst::Predicate Pred = InvertCond ? CmpInst::ICMP_NE : CmpInst::ICMP_EQ;
394   SwitchCG::CaseBlock CB(
395       Pred, false, Cond, ConstantInt::getTrue(MF->getFunction().getContext()),
396       nullptr, TBB, FBB, CurBB, CurBuilder->getDebugLoc(), TProb, FProb);
397   SL->SwitchCases.push_back(CB);
398 }
399 
400 static bool isValInBlock(const Value *V, const BasicBlock *BB) {
401   if (const Instruction *I = dyn_cast<Instruction>(V))
402     return I->getParent() == BB;
403   return true;
404 }
405 
406 void IRTranslator::findMergedConditions(
407     const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
408     MachineBasicBlock *CurBB, MachineBasicBlock *SwitchBB,
409     Instruction::BinaryOps Opc, BranchProbability TProb,
410     BranchProbability FProb, bool InvertCond) {
411   using namespace PatternMatch;
412   assert((Opc == Instruction::And || Opc == Instruction::Or) &&
413          "Expected Opc to be AND/OR");
414   // Skip over not part of the tree and remember to invert op and operands at
415   // next level.
416   Value *NotCond;
417   if (match(Cond, m_OneUse(m_Not(m_Value(NotCond)))) &&
418       isValInBlock(NotCond, CurBB->getBasicBlock())) {
419     findMergedConditions(NotCond, TBB, FBB, CurBB, SwitchBB, Opc, TProb, FProb,
420                          !InvertCond);
421     return;
422   }
423 
424   const Instruction *BOp = dyn_cast<Instruction>(Cond);
425   // Compute the effective opcode for Cond, taking into account whether it needs
426   // to be inverted, e.g.
427   //   and (not (or A, B)), C
428   // gets lowered as
429   //   and (and (not A, not B), C)
430   unsigned BOpc = 0;
431   if (BOp) {
432     BOpc = BOp->getOpcode();
433     if (InvertCond) {
434       if (BOpc == Instruction::And)
435         BOpc = Instruction::Or;
436       else if (BOpc == Instruction::Or)
437         BOpc = Instruction::And;
438     }
439   }
440 
441   // If this node is not part of the or/and tree, emit it as a branch.
442   if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) ||
443       BOpc != static_cast<unsigned>(Opc) || !BOp->hasOneUse() ||
444       BOp->getParent() != CurBB->getBasicBlock() ||
445       !isValInBlock(BOp->getOperand(0), CurBB->getBasicBlock()) ||
446       !isValInBlock(BOp->getOperand(1), CurBB->getBasicBlock())) {
447     emitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB, TProb, FProb,
448                                  InvertCond);
449     return;
450   }
451 
452   //  Create TmpBB after CurBB.
453   MachineFunction::iterator BBI(CurBB);
454   MachineBasicBlock *TmpBB =
455       MF->CreateMachineBasicBlock(CurBB->getBasicBlock());
456   CurBB->getParent()->insert(++BBI, TmpBB);
457 
458   if (Opc == Instruction::Or) {
459     // Codegen X | Y as:
460     // BB1:
461     //   jmp_if_X TBB
462     //   jmp TmpBB
463     // TmpBB:
464     //   jmp_if_Y TBB
465     //   jmp FBB
466     //
467 
468     // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
469     // The requirement is that
470     //   TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
471     //     = TrueProb for original BB.
472     // Assuming the original probabilities are A and B, one choice is to set
473     // BB1's probabilities to A/2 and A/2+B, and set TmpBB's probabilities to
474     // A/(1+B) and 2B/(1+B). This choice assumes that
475     //   TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
476     // Another choice is to assume TrueProb for BB1 equals to TrueProb for
477     // TmpBB, but the math is more complicated.
478 
479     auto NewTrueProb = TProb / 2;
480     auto NewFalseProb = TProb / 2 + FProb;
481     // Emit the LHS condition.
482     findMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc,
483                          NewTrueProb, NewFalseProb, InvertCond);
484 
485     // Normalize A/2 and B to get A/(1+B) and 2B/(1+B).
486     SmallVector<BranchProbability, 2> Probs{TProb / 2, FProb};
487     BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
488     // Emit the RHS condition into TmpBB.
489     findMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc,
490                          Probs[0], Probs[1], InvertCond);
491   } else {
492     assert(Opc == Instruction::And && "Unknown merge op!");
493     // Codegen X & Y as:
494     // BB1:
495     //   jmp_if_X TmpBB
496     //   jmp FBB
497     // TmpBB:
498     //   jmp_if_Y TBB
499     //   jmp FBB
500     //
501     //  This requires creation of TmpBB after CurBB.
502 
503     // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
504     // The requirement is that
505     //   FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
506     //     = FalseProb for original BB.
507     // Assuming the original probabilities are A and B, one choice is to set
508     // BB1's probabilities to A+B/2 and B/2, and set TmpBB's probabilities to
509     // 2A/(1+A) and B/(1+A). This choice assumes that FalseProb for BB1 ==
510     // TrueProb for BB1 * FalseProb for TmpBB.
511 
512     auto NewTrueProb = TProb + FProb / 2;
513     auto NewFalseProb = FProb / 2;
514     // Emit the LHS condition.
515     findMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc,
516                          NewTrueProb, NewFalseProb, InvertCond);
517 
518     // Normalize A and B/2 to get 2A/(1+A) and B/(1+A).
519     SmallVector<BranchProbability, 2> Probs{TProb, FProb / 2};
520     BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
521     // Emit the RHS condition into TmpBB.
522     findMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc,
523                          Probs[0], Probs[1], InvertCond);
524   }
525 }
526 
527 bool IRTranslator::shouldEmitAsBranches(
528     const std::vector<SwitchCG::CaseBlock> &Cases) {
529   // For multiple cases, it's better to emit as branches.
530   if (Cases.size() != 2)
531     return true;
532 
533   // If this is two comparisons of the same values or'd or and'd together, they
534   // will get folded into a single comparison, so don't emit two blocks.
535   if ((Cases[0].CmpLHS == Cases[1].CmpLHS &&
536        Cases[0].CmpRHS == Cases[1].CmpRHS) ||
537       (Cases[0].CmpRHS == Cases[1].CmpLHS &&
538        Cases[0].CmpLHS == Cases[1].CmpRHS)) {
539     return false;
540   }
541 
542   // Handle: (X != null) | (Y != null) --> (X|Y) != 0
543   // Handle: (X == null) & (Y == null) --> (X|Y) == 0
544   if (Cases[0].CmpRHS == Cases[1].CmpRHS &&
545       Cases[0].PredInfo.Pred == Cases[1].PredInfo.Pred &&
546       isa<Constant>(Cases[0].CmpRHS) &&
547       cast<Constant>(Cases[0].CmpRHS)->isNullValue()) {
548     if (Cases[0].PredInfo.Pred == CmpInst::ICMP_EQ &&
549         Cases[0].TrueBB == Cases[1].ThisBB)
550       return false;
551     if (Cases[0].PredInfo.Pred == CmpInst::ICMP_NE &&
552         Cases[0].FalseBB == Cases[1].ThisBB)
553       return false;
554   }
555 
556   return true;
557 }
558 
559 bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) {
560   const BranchInst &BrInst = cast<BranchInst>(U);
561   auto &CurMBB = MIRBuilder.getMBB();
562   auto *Succ0MBB = &getMBB(*BrInst.getSuccessor(0));
563 
564   if (BrInst.isUnconditional()) {
565     // If the unconditional target is the layout successor, fallthrough.
566     if (!CurMBB.isLayoutSuccessor(Succ0MBB))
567       MIRBuilder.buildBr(*Succ0MBB);
568 
569     // Link successors.
570     for (const BasicBlock *Succ : successors(&BrInst))
571       CurMBB.addSuccessor(&getMBB(*Succ));
572     return true;
573   }
574 
575   // If this condition is one of the special cases we handle, do special stuff
576   // now.
577   const Value *CondVal = BrInst.getCondition();
578   MachineBasicBlock *Succ1MBB = &getMBB(*BrInst.getSuccessor(1));
579 
580   const auto &TLI = *MF->getSubtarget().getTargetLowering();
581 
582   // If this is a series of conditions that are or'd or and'd together, emit
583   // this as a sequence of branches instead of setcc's with and/or operations.
584   // As long as jumps are not expensive (exceptions for multi-use logic ops,
585   // unpredictable branches, and vector extracts because those jumps are likely
586   // expensive for any target), this should improve performance.
587   // For example, instead of something like:
588   //     cmp A, B
589   //     C = seteq
590   //     cmp D, E
591   //     F = setle
592   //     or C, F
593   //     jnz foo
594   // Emit:
595   //     cmp A, B
596   //     je foo
597   //     cmp D, E
598   //     jle foo
599   using namespace PatternMatch;
600   if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) {
601     Instruction::BinaryOps Opcode = BOp->getOpcode();
602     Value *Vec, *BOp0 = BOp->getOperand(0), *BOp1 = BOp->getOperand(1);
603     if (!TLI.isJumpExpensive() && BOp->hasOneUse() &&
604         !BrInst.hasMetadata(LLVMContext::MD_unpredictable) &&
605         (Opcode == Instruction::And || Opcode == Instruction::Or) &&
606         !(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) &&
607           match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value())))) {
608       findMergedConditions(BOp, Succ0MBB, Succ1MBB, &CurMBB, &CurMBB, Opcode,
609                            getEdgeProbability(&CurMBB, Succ0MBB),
610                            getEdgeProbability(&CurMBB, Succ1MBB),
611                            /*InvertCond=*/false);
612       assert(SL->SwitchCases[0].ThisBB == &CurMBB && "Unexpected lowering!");
613 
614       // Allow some cases to be rejected.
615       if (shouldEmitAsBranches(SL->SwitchCases)) {
616         // Emit the branch for this block.
617         emitSwitchCase(SL->SwitchCases[0], &CurMBB, *CurBuilder);
618         SL->SwitchCases.erase(SL->SwitchCases.begin());
619         return true;
620       }
621 
622       // Okay, we decided not to do this, remove any inserted MBB's and clear
623       // SwitchCases.
624       for (unsigned I = 1, E = SL->SwitchCases.size(); I != E; ++I)
625         MF->erase(SL->SwitchCases[I].ThisBB);
626 
627       SL->SwitchCases.clear();
628     }
629   }
630 
631   // Create a CaseBlock record representing this branch.
632   SwitchCG::CaseBlock CB(CmpInst::ICMP_EQ, false, CondVal,
633                          ConstantInt::getTrue(MF->getFunction().getContext()),
634                          nullptr, Succ0MBB, Succ1MBB, &CurMBB,
635                          CurBuilder->getDebugLoc());
636 
637   // Use emitSwitchCase to actually insert the fast branch sequence for this
638   // cond branch.
639   emitSwitchCase(CB, &CurMBB, *CurBuilder);
640   return true;
641 }
642 
643 void IRTranslator::addSuccessorWithProb(MachineBasicBlock *Src,
644                                         MachineBasicBlock *Dst,
645                                         BranchProbability Prob) {
646   if (!FuncInfo.BPI) {
647     Src->addSuccessorWithoutProb(Dst);
648     return;
649   }
650   if (Prob.isUnknown())
651     Prob = getEdgeProbability(Src, Dst);
652   Src->addSuccessor(Dst, Prob);
653 }
654 
655 BranchProbability
656 IRTranslator::getEdgeProbability(const MachineBasicBlock *Src,
657                                  const MachineBasicBlock *Dst) const {
658   const BasicBlock *SrcBB = Src->getBasicBlock();
659   const BasicBlock *DstBB = Dst->getBasicBlock();
660   if (!FuncInfo.BPI) {
661     // If BPI is not available, set the default probability as 1 / N, where N is
662     // the number of successors.
663     auto SuccSize = std::max<uint32_t>(succ_size(SrcBB), 1);
664     return BranchProbability(1, SuccSize);
665   }
666   return FuncInfo.BPI->getEdgeProbability(SrcBB, DstBB);
667 }
668 
669 bool IRTranslator::translateSwitch(const User &U, MachineIRBuilder &MIB) {
670   using namespace SwitchCG;
671   // Extract cases from the switch.
672   const SwitchInst &SI = cast<SwitchInst>(U);
673   BranchProbabilityInfo *BPI = FuncInfo.BPI;
674   CaseClusterVector Clusters;
675   Clusters.reserve(SI.getNumCases());
676   for (auto &I : SI.cases()) {
677     MachineBasicBlock *Succ = &getMBB(*I.getCaseSuccessor());
678     assert(Succ && "Could not find successor mbb in mapping");
679     const ConstantInt *CaseVal = I.getCaseValue();
680     BranchProbability Prob =
681         BPI ? BPI->getEdgeProbability(SI.getParent(), I.getSuccessorIndex())
682             : BranchProbability(1, SI.getNumCases() + 1);
683     Clusters.push_back(CaseCluster::range(CaseVal, CaseVal, Succ, Prob));
684   }
685 
686   MachineBasicBlock *DefaultMBB = &getMBB(*SI.getDefaultDest());
687 
688   // Cluster adjacent cases with the same destination. We do this at all
689   // optimization levels because it's cheap to do and will make codegen faster
690   // if there are many clusters.
691   sortAndRangeify(Clusters);
692 
693   MachineBasicBlock *SwitchMBB = &getMBB(*SI.getParent());
694 
695   // If there is only the default destination, jump there directly.
696   if (Clusters.empty()) {
697     SwitchMBB->addSuccessor(DefaultMBB);
698     if (DefaultMBB != SwitchMBB->getNextNode())
699       MIB.buildBr(*DefaultMBB);
700     return true;
701   }
702 
703   SL->findJumpTables(Clusters, &SI, DefaultMBB, nullptr, nullptr);
704   SL->findBitTestClusters(Clusters, &SI);
705 
706   LLVM_DEBUG({
707     dbgs() << "Case clusters: ";
708     for (const CaseCluster &C : Clusters) {
709       if (C.Kind == CC_JumpTable)
710         dbgs() << "JT:";
711       if (C.Kind == CC_BitTests)
712         dbgs() << "BT:";
713 
714       C.Low->getValue().print(dbgs(), true);
715       if (C.Low != C.High) {
716         dbgs() << '-';
717         C.High->getValue().print(dbgs(), true);
718       }
719       dbgs() << ' ';
720     }
721     dbgs() << '\n';
722   });
723 
724   assert(!Clusters.empty());
725   SwitchWorkList WorkList;
726   CaseClusterIt First = Clusters.begin();
727   CaseClusterIt Last = Clusters.end() - 1;
728   auto DefaultProb = getEdgeProbability(SwitchMBB, DefaultMBB);
729   WorkList.push_back({SwitchMBB, First, Last, nullptr, nullptr, DefaultProb});
730 
731   // FIXME: At the moment we don't do any splitting optimizations here like
732   // SelectionDAG does, so this worklist only has one entry.
733   while (!WorkList.empty()) {
734     SwitchWorkListItem W = WorkList.back();
735     WorkList.pop_back();
736     if (!lowerSwitchWorkItem(W, SI.getCondition(), SwitchMBB, DefaultMBB, MIB))
737       return false;
738   }
739   return true;
740 }
741 
742 void IRTranslator::emitJumpTable(SwitchCG::JumpTable &JT,
743                                  MachineBasicBlock *MBB) {
744   // Emit the code for the jump table
745   assert(JT.Reg != -1U && "Should lower JT Header first!");
746   MachineIRBuilder MIB(*MBB->getParent());
747   MIB.setMBB(*MBB);
748   MIB.setDebugLoc(CurBuilder->getDebugLoc());
749 
750   Type *PtrIRTy = Type::getInt8PtrTy(MF->getFunction().getContext());
751   const LLT PtrTy = getLLTForType(*PtrIRTy, *DL);
752 
753   auto Table = MIB.buildJumpTable(PtrTy, JT.JTI);
754   MIB.buildBrJT(Table.getReg(0), JT.JTI, JT.Reg);
755 }
756 
757 bool IRTranslator::emitJumpTableHeader(SwitchCG::JumpTable &JT,
758                                        SwitchCG::JumpTableHeader &JTH,
759                                        MachineBasicBlock *HeaderBB) {
760   MachineIRBuilder MIB(*HeaderBB->getParent());
761   MIB.setMBB(*HeaderBB);
762   MIB.setDebugLoc(CurBuilder->getDebugLoc());
763 
764   const Value &SValue = *JTH.SValue;
765   // Subtract the lowest switch case value from the value being switched on.
766   const LLT SwitchTy = getLLTForType(*SValue.getType(), *DL);
767   Register SwitchOpReg = getOrCreateVReg(SValue);
768   auto FirstCst = MIB.buildConstant(SwitchTy, JTH.First);
769   auto Sub = MIB.buildSub({SwitchTy}, SwitchOpReg, FirstCst);
770 
771   // This value may be smaller or larger than the target's pointer type, and
772   // therefore require extension or truncating.
773   Type *PtrIRTy = SValue.getType()->getPointerTo();
774   const LLT PtrScalarTy = LLT::scalar(DL->getTypeSizeInBits(PtrIRTy));
775   Sub = MIB.buildZExtOrTrunc(PtrScalarTy, Sub);
776 
777   JT.Reg = Sub.getReg(0);
778 
779   if (JTH.OmitRangeCheck) {
780     if (JT.MBB != HeaderBB->getNextNode())
781       MIB.buildBr(*JT.MBB);
782     return true;
783   }
784 
785   // Emit the range check for the jump table, and branch to the default block
786   // for the switch statement if the value being switched on exceeds the
787   // largest case in the switch.
788   auto Cst = getOrCreateVReg(
789       *ConstantInt::get(SValue.getType(), JTH.Last - JTH.First));
790   Cst = MIB.buildZExtOrTrunc(PtrScalarTy, Cst).getReg(0);
791   auto Cmp = MIB.buildICmp(CmpInst::ICMP_UGT, LLT::scalar(1), Sub, Cst);
792 
793   auto BrCond = MIB.buildBrCond(Cmp.getReg(0), *JT.Default);
794 
795   // Avoid emitting unnecessary branches to the next block.
796   if (JT.MBB != HeaderBB->getNextNode())
797     BrCond = MIB.buildBr(*JT.MBB);
798   return true;
799 }
800 
801 void IRTranslator::emitSwitchCase(SwitchCG::CaseBlock &CB,
802                                   MachineBasicBlock *SwitchBB,
803                                   MachineIRBuilder &MIB) {
804   Register CondLHS = getOrCreateVReg(*CB.CmpLHS);
805   Register Cond;
806   DebugLoc OldDbgLoc = MIB.getDebugLoc();
807   MIB.setDebugLoc(CB.DbgLoc);
808   MIB.setMBB(*CB.ThisBB);
809 
810   if (CB.PredInfo.NoCmp) {
811     // Branch or fall through to TrueBB.
812     addSuccessorWithProb(CB.ThisBB, CB.TrueBB, CB.TrueProb);
813     addMachineCFGPred({SwitchBB->getBasicBlock(), CB.TrueBB->getBasicBlock()},
814                       CB.ThisBB);
815     CB.ThisBB->normalizeSuccProbs();
816     if (CB.TrueBB != CB.ThisBB->getNextNode())
817       MIB.buildBr(*CB.TrueBB);
818     MIB.setDebugLoc(OldDbgLoc);
819     return;
820   }
821 
822   const LLT i1Ty = LLT::scalar(1);
823   // Build the compare.
824   if (!CB.CmpMHS) {
825     const auto *CI = dyn_cast<ConstantInt>(CB.CmpRHS);
826     // For conditional branch lowering, we might try to do something silly like
827     // emit an G_ICMP to compare an existing G_ICMP i1 result with true. If so,
828     // just re-use the existing condition vreg.
829     if (CI && CI->getZExtValue() == 1 &&
830         MRI->getType(CondLHS).getSizeInBits() == 1 &&
831         CB.PredInfo.Pred == CmpInst::ICMP_EQ) {
832       Cond = CondLHS;
833     } else {
834       Register CondRHS = getOrCreateVReg(*CB.CmpRHS);
835       if (CmpInst::isFPPredicate(CB.PredInfo.Pred))
836         Cond =
837             MIB.buildFCmp(CB.PredInfo.Pred, i1Ty, CondLHS, CondRHS).getReg(0);
838       else
839         Cond =
840             MIB.buildICmp(CB.PredInfo.Pred, i1Ty, CondLHS, CondRHS).getReg(0);
841     }
842   } else {
843     assert(CB.PredInfo.Pred == CmpInst::ICMP_SLE &&
844            "Can only handle SLE ranges");
845 
846     const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue();
847     const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue();
848 
849     Register CmpOpReg = getOrCreateVReg(*CB.CmpMHS);
850     if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) {
851       Register CondRHS = getOrCreateVReg(*CB.CmpRHS);
852       Cond =
853           MIB.buildICmp(CmpInst::ICMP_SLE, i1Ty, CmpOpReg, CondRHS).getReg(0);
854     } else {
855       const LLT CmpTy = MRI->getType(CmpOpReg);
856       auto Sub = MIB.buildSub({CmpTy}, CmpOpReg, CondLHS);
857       auto Diff = MIB.buildConstant(CmpTy, High - Low);
858       Cond = MIB.buildICmp(CmpInst::ICMP_ULE, i1Ty, Sub, Diff).getReg(0);
859     }
860   }
861 
862   // Update successor info
863   addSuccessorWithProb(CB.ThisBB, CB.TrueBB, CB.TrueProb);
864 
865   addMachineCFGPred({SwitchBB->getBasicBlock(), CB.TrueBB->getBasicBlock()},
866                     CB.ThisBB);
867 
868   // TrueBB and FalseBB are always different unless the incoming IR is
869   // degenerate. This only happens when running llc on weird IR.
870   if (CB.TrueBB != CB.FalseBB)
871     addSuccessorWithProb(CB.ThisBB, CB.FalseBB, CB.FalseProb);
872   CB.ThisBB->normalizeSuccProbs();
873 
874   addMachineCFGPred({SwitchBB->getBasicBlock(), CB.FalseBB->getBasicBlock()},
875                     CB.ThisBB);
876 
877   MIB.buildBrCond(Cond, *CB.TrueBB);
878   MIB.buildBr(*CB.FalseBB);
879   MIB.setDebugLoc(OldDbgLoc);
880 }
881 
882 bool IRTranslator::lowerJumpTableWorkItem(SwitchCG::SwitchWorkListItem W,
883                                           MachineBasicBlock *SwitchMBB,
884                                           MachineBasicBlock *CurMBB,
885                                           MachineBasicBlock *DefaultMBB,
886                                           MachineIRBuilder &MIB,
887                                           MachineFunction::iterator BBI,
888                                           BranchProbability UnhandledProbs,
889                                           SwitchCG::CaseClusterIt I,
890                                           MachineBasicBlock *Fallthrough,
891                                           bool FallthroughUnreachable) {
892   using namespace SwitchCG;
893   MachineFunction *CurMF = SwitchMBB->getParent();
894   // FIXME: Optimize away range check based on pivot comparisons.
895   JumpTableHeader *JTH = &SL->JTCases[I->JTCasesIndex].first;
896   SwitchCG::JumpTable *JT = &SL->JTCases[I->JTCasesIndex].second;
897   BranchProbability DefaultProb = W.DefaultProb;
898 
899   // The jump block hasn't been inserted yet; insert it here.
900   MachineBasicBlock *JumpMBB = JT->MBB;
901   CurMF->insert(BBI, JumpMBB);
902 
903   // Since the jump table block is separate from the switch block, we need
904   // to keep track of it as a machine predecessor to the default block,
905   // otherwise we lose the phi edges.
906   addMachineCFGPred({SwitchMBB->getBasicBlock(), DefaultMBB->getBasicBlock()},
907                     CurMBB);
908   addMachineCFGPred({SwitchMBB->getBasicBlock(), DefaultMBB->getBasicBlock()},
909                     JumpMBB);
910 
911   auto JumpProb = I->Prob;
912   auto FallthroughProb = UnhandledProbs;
913 
914   // If the default statement is a target of the jump table, we evenly
915   // distribute the default probability to successors of CurMBB. Also
916   // update the probability on the edge from JumpMBB to Fallthrough.
917   for (MachineBasicBlock::succ_iterator SI = JumpMBB->succ_begin(),
918                                         SE = JumpMBB->succ_end();
919        SI != SE; ++SI) {
920     if (*SI == DefaultMBB) {
921       JumpProb += DefaultProb / 2;
922       FallthroughProb -= DefaultProb / 2;
923       JumpMBB->setSuccProbability(SI, DefaultProb / 2);
924       JumpMBB->normalizeSuccProbs();
925     } else {
926       // Also record edges from the jump table block to it's successors.
927       addMachineCFGPred({SwitchMBB->getBasicBlock(), (*SI)->getBasicBlock()},
928                         JumpMBB);
929     }
930   }
931 
932   // Skip the range check if the fallthrough block is unreachable.
933   if (FallthroughUnreachable)
934     JTH->OmitRangeCheck = true;
935 
936   if (!JTH->OmitRangeCheck)
937     addSuccessorWithProb(CurMBB, Fallthrough, FallthroughProb);
938   addSuccessorWithProb(CurMBB, JumpMBB, JumpProb);
939   CurMBB->normalizeSuccProbs();
940 
941   // The jump table header will be inserted in our current block, do the
942   // range check, and fall through to our fallthrough block.
943   JTH->HeaderBB = CurMBB;
944   JT->Default = Fallthrough; // FIXME: Move Default to JumpTableHeader.
945 
946   // If we're in the right place, emit the jump table header right now.
947   if (CurMBB == SwitchMBB) {
948     if (!emitJumpTableHeader(*JT, *JTH, CurMBB))
949       return false;
950     JTH->Emitted = true;
951   }
952   return true;
953 }
954 bool IRTranslator::lowerSwitchRangeWorkItem(SwitchCG::CaseClusterIt I,
955                                             Value *Cond,
956                                             MachineBasicBlock *Fallthrough,
957                                             bool FallthroughUnreachable,
958                                             BranchProbability UnhandledProbs,
959                                             MachineBasicBlock *CurMBB,
960                                             MachineIRBuilder &MIB,
961                                             MachineBasicBlock *SwitchMBB) {
962   using namespace SwitchCG;
963   const Value *RHS, *LHS, *MHS;
964   CmpInst::Predicate Pred;
965   if (I->Low == I->High) {
966     // Check Cond == I->Low.
967     Pred = CmpInst::ICMP_EQ;
968     LHS = Cond;
969     RHS = I->Low;
970     MHS = nullptr;
971   } else {
972     // Check I->Low <= Cond <= I->High.
973     Pred = CmpInst::ICMP_SLE;
974     LHS = I->Low;
975     MHS = Cond;
976     RHS = I->High;
977   }
978 
979   // If Fallthrough is unreachable, fold away the comparison.
980   // The false probability is the sum of all unhandled cases.
981   CaseBlock CB(Pred, FallthroughUnreachable, LHS, RHS, MHS, I->MBB, Fallthrough,
982                CurMBB, MIB.getDebugLoc(), I->Prob, UnhandledProbs);
983 
984   emitSwitchCase(CB, SwitchMBB, MIB);
985   return true;
986 }
987 
988 void IRTranslator::emitBitTestHeader(SwitchCG::BitTestBlock &B,
989                                      MachineBasicBlock *SwitchBB) {
990   MachineIRBuilder &MIB = *CurBuilder;
991   MIB.setMBB(*SwitchBB);
992 
993   // Subtract the minimum value.
994   Register SwitchOpReg = getOrCreateVReg(*B.SValue);
995 
996   LLT SwitchOpTy = MRI->getType(SwitchOpReg);
997   Register MinValReg = MIB.buildConstant(SwitchOpTy, B.First).getReg(0);
998   auto RangeSub = MIB.buildSub(SwitchOpTy, SwitchOpReg, MinValReg);
999 
1000   // Ensure that the type will fit the mask value.
1001   LLT MaskTy = SwitchOpTy;
1002   for (unsigned I = 0, E = B.Cases.size(); I != E; ++I) {
1003     if (!isUIntN(SwitchOpTy.getSizeInBits(), B.Cases[I].Mask)) {
1004       // Switch table case range are encoded into series of masks.
1005       // Just use pointer type, it's guaranteed to fit.
1006       MaskTy = LLT::scalar(64);
1007       break;
1008     }
1009   }
1010   Register SubReg = RangeSub.getReg(0);
1011   if (SwitchOpTy != MaskTy)
1012     SubReg = MIB.buildZExtOrTrunc(MaskTy, SubReg).getReg(0);
1013 
1014   B.RegVT = getMVTForLLT(MaskTy);
1015   B.Reg = SubReg;
1016 
1017   MachineBasicBlock *MBB = B.Cases[0].ThisBB;
1018 
1019   if (!B.OmitRangeCheck)
1020     addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb);
1021   addSuccessorWithProb(SwitchBB, MBB, B.Prob);
1022 
1023   SwitchBB->normalizeSuccProbs();
1024 
1025   if (!B.OmitRangeCheck) {
1026     // Conditional branch to the default block.
1027     auto RangeCst = MIB.buildConstant(SwitchOpTy, B.Range);
1028     auto RangeCmp = MIB.buildICmp(CmpInst::Predicate::ICMP_UGT, LLT::scalar(1),
1029                                   RangeSub, RangeCst);
1030     MIB.buildBrCond(RangeCmp, *B.Default);
1031   }
1032 
1033   // Avoid emitting unnecessary branches to the next block.
1034   if (MBB != SwitchBB->getNextNode())
1035     MIB.buildBr(*MBB);
1036 }
1037 
1038 void IRTranslator::emitBitTestCase(SwitchCG::BitTestBlock &BB,
1039                                    MachineBasicBlock *NextMBB,
1040                                    BranchProbability BranchProbToNext,
1041                                    Register Reg, SwitchCG::BitTestCase &B,
1042                                    MachineBasicBlock *SwitchBB) {
1043   MachineIRBuilder &MIB = *CurBuilder;
1044   MIB.setMBB(*SwitchBB);
1045 
1046   LLT SwitchTy = getLLTForMVT(BB.RegVT);
1047   Register Cmp;
1048   unsigned PopCount = countPopulation(B.Mask);
1049   if (PopCount == 1) {
1050     // Testing for a single bit; just compare the shift count with what it
1051     // would need to be to shift a 1 bit in that position.
1052     auto MaskTrailingZeros =
1053         MIB.buildConstant(SwitchTy, countTrailingZeros(B.Mask));
1054     Cmp =
1055         MIB.buildICmp(ICmpInst::ICMP_EQ, LLT::scalar(1), Reg, MaskTrailingZeros)
1056             .getReg(0);
1057   } else if (PopCount == BB.Range) {
1058     // There is only one zero bit in the range, test for it directly.
1059     auto MaskTrailingOnes =
1060         MIB.buildConstant(SwitchTy, countTrailingOnes(B.Mask));
1061     Cmp = MIB.buildICmp(CmpInst::ICMP_NE, LLT::scalar(1), Reg, MaskTrailingOnes)
1062               .getReg(0);
1063   } else {
1064     // Make desired shift.
1065     auto CstOne = MIB.buildConstant(SwitchTy, 1);
1066     auto SwitchVal = MIB.buildShl(SwitchTy, CstOne, Reg);
1067 
1068     // Emit bit tests and jumps.
1069     auto CstMask = MIB.buildConstant(SwitchTy, B.Mask);
1070     auto AndOp = MIB.buildAnd(SwitchTy, SwitchVal, CstMask);
1071     auto CstZero = MIB.buildConstant(SwitchTy, 0);
1072     Cmp = MIB.buildICmp(CmpInst::ICMP_NE, LLT::scalar(1), AndOp, CstZero)
1073               .getReg(0);
1074   }
1075 
1076   // The branch probability from SwitchBB to B.TargetBB is B.ExtraProb.
1077   addSuccessorWithProb(SwitchBB, B.TargetBB, B.ExtraProb);
1078   // The branch probability from SwitchBB to NextMBB is BranchProbToNext.
1079   addSuccessorWithProb(SwitchBB, NextMBB, BranchProbToNext);
1080   // It is not guaranteed that the sum of B.ExtraProb and BranchProbToNext is
1081   // one as they are relative probabilities (and thus work more like weights),
1082   // and hence we need to normalize them to let the sum of them become one.
1083   SwitchBB->normalizeSuccProbs();
1084 
1085   // Record the fact that the IR edge from the header to the bit test target
1086   // will go through our new block. Neeeded for PHIs to have nodes added.
1087   addMachineCFGPred({BB.Parent->getBasicBlock(), B.TargetBB->getBasicBlock()},
1088                     SwitchBB);
1089 
1090   MIB.buildBrCond(Cmp, *B.TargetBB);
1091 
1092   // Avoid emitting unnecessary branches to the next block.
1093   if (NextMBB != SwitchBB->getNextNode())
1094     MIB.buildBr(*NextMBB);
1095 }
1096 
1097 bool IRTranslator::lowerBitTestWorkItem(
1098     SwitchCG::SwitchWorkListItem W, MachineBasicBlock *SwitchMBB,
1099     MachineBasicBlock *CurMBB, MachineBasicBlock *DefaultMBB,
1100     MachineIRBuilder &MIB, MachineFunction::iterator BBI,
1101     BranchProbability DefaultProb, BranchProbability UnhandledProbs,
1102     SwitchCG::CaseClusterIt I, MachineBasicBlock *Fallthrough,
1103     bool FallthroughUnreachable) {
1104   using namespace SwitchCG;
1105   MachineFunction *CurMF = SwitchMBB->getParent();
1106   // FIXME: Optimize away range check based on pivot comparisons.
1107   BitTestBlock *BTB = &SL->BitTestCases[I->BTCasesIndex];
1108   // The bit test blocks haven't been inserted yet; insert them here.
1109   for (BitTestCase &BTC : BTB->Cases)
1110     CurMF->insert(BBI, BTC.ThisBB);
1111 
1112   // Fill in fields of the BitTestBlock.
1113   BTB->Parent = CurMBB;
1114   BTB->Default = Fallthrough;
1115 
1116   BTB->DefaultProb = UnhandledProbs;
1117   // If the cases in bit test don't form a contiguous range, we evenly
1118   // distribute the probability on the edge to Fallthrough to two
1119   // successors of CurMBB.
1120   if (!BTB->ContiguousRange) {
1121     BTB->Prob += DefaultProb / 2;
1122     BTB->DefaultProb -= DefaultProb / 2;
1123   }
1124 
1125   if (FallthroughUnreachable) {
1126     // Skip the range check if the fallthrough block is unreachable.
1127     BTB->OmitRangeCheck = true;
1128   }
1129 
1130   // If we're in the right place, emit the bit test header right now.
1131   if (CurMBB == SwitchMBB) {
1132     emitBitTestHeader(*BTB, SwitchMBB);
1133     BTB->Emitted = true;
1134   }
1135   return true;
1136 }
1137 
1138 bool IRTranslator::lowerSwitchWorkItem(SwitchCG::SwitchWorkListItem W,
1139                                        Value *Cond,
1140                                        MachineBasicBlock *SwitchMBB,
1141                                        MachineBasicBlock *DefaultMBB,
1142                                        MachineIRBuilder &MIB) {
1143   using namespace SwitchCG;
1144   MachineFunction *CurMF = FuncInfo.MF;
1145   MachineBasicBlock *NextMBB = nullptr;
1146   MachineFunction::iterator BBI(W.MBB);
1147   if (++BBI != FuncInfo.MF->end())
1148     NextMBB = &*BBI;
1149 
1150   if (EnableOpts) {
1151     // Here, we order cases by probability so the most likely case will be
1152     // checked first. However, two clusters can have the same probability in
1153     // which case their relative ordering is non-deterministic. So we use Low
1154     // as a tie-breaker as clusters are guaranteed to never overlap.
1155     llvm::sort(W.FirstCluster, W.LastCluster + 1,
1156                [](const CaseCluster &a, const CaseCluster &b) {
1157                  return a.Prob != b.Prob
1158                             ? a.Prob > b.Prob
1159                             : a.Low->getValue().slt(b.Low->getValue());
1160                });
1161 
1162     // Rearrange the case blocks so that the last one falls through if possible
1163     // without changing the order of probabilities.
1164     for (CaseClusterIt I = W.LastCluster; I > W.FirstCluster;) {
1165       --I;
1166       if (I->Prob > W.LastCluster->Prob)
1167         break;
1168       if (I->Kind == CC_Range && I->MBB == NextMBB) {
1169         std::swap(*I, *W.LastCluster);
1170         break;
1171       }
1172     }
1173   }
1174 
1175   // Compute total probability.
1176   BranchProbability DefaultProb = W.DefaultProb;
1177   BranchProbability UnhandledProbs = DefaultProb;
1178   for (CaseClusterIt I = W.FirstCluster; I <= W.LastCluster; ++I)
1179     UnhandledProbs += I->Prob;
1180 
1181   MachineBasicBlock *CurMBB = W.MBB;
1182   for (CaseClusterIt I = W.FirstCluster, E = W.LastCluster; I <= E; ++I) {
1183     bool FallthroughUnreachable = false;
1184     MachineBasicBlock *Fallthrough;
1185     if (I == W.LastCluster) {
1186       // For the last cluster, fall through to the default destination.
1187       Fallthrough = DefaultMBB;
1188       FallthroughUnreachable = isa<UnreachableInst>(
1189           DefaultMBB->getBasicBlock()->getFirstNonPHIOrDbg());
1190     } else {
1191       Fallthrough = CurMF->CreateMachineBasicBlock(CurMBB->getBasicBlock());
1192       CurMF->insert(BBI, Fallthrough);
1193     }
1194     UnhandledProbs -= I->Prob;
1195 
1196     switch (I->Kind) {
1197     case CC_BitTests: {
1198       if (!lowerBitTestWorkItem(W, SwitchMBB, CurMBB, DefaultMBB, MIB, BBI,
1199                                 DefaultProb, UnhandledProbs, I, Fallthrough,
1200                                 FallthroughUnreachable)) {
1201         LLVM_DEBUG(dbgs() << "Failed to lower bit test for switch");
1202         return false;
1203       }
1204       break;
1205     }
1206 
1207     case CC_JumpTable: {
1208       if (!lowerJumpTableWorkItem(W, SwitchMBB, CurMBB, DefaultMBB, MIB, BBI,
1209                                   UnhandledProbs, I, Fallthrough,
1210                                   FallthroughUnreachable)) {
1211         LLVM_DEBUG(dbgs() << "Failed to lower jump table");
1212         return false;
1213       }
1214       break;
1215     }
1216     case CC_Range: {
1217       if (!lowerSwitchRangeWorkItem(I, Cond, Fallthrough,
1218                                     FallthroughUnreachable, UnhandledProbs,
1219                                     CurMBB, MIB, SwitchMBB)) {
1220         LLVM_DEBUG(dbgs() << "Failed to lower switch range");
1221         return false;
1222       }
1223       break;
1224     }
1225     }
1226     CurMBB = Fallthrough;
1227   }
1228 
1229   return true;
1230 }
1231 
1232 bool IRTranslator::translateIndirectBr(const User &U,
1233                                        MachineIRBuilder &MIRBuilder) {
1234   const IndirectBrInst &BrInst = cast<IndirectBrInst>(U);
1235 
1236   const Register Tgt = getOrCreateVReg(*BrInst.getAddress());
1237   MIRBuilder.buildBrIndirect(Tgt);
1238 
1239   // Link successors.
1240   SmallPtrSet<const BasicBlock *, 32> AddedSuccessors;
1241   MachineBasicBlock &CurBB = MIRBuilder.getMBB();
1242   for (const BasicBlock *Succ : successors(&BrInst)) {
1243     // It's legal for indirectbr instructions to have duplicate blocks in the
1244     // destination list. We don't allow this in MIR. Skip anything that's
1245     // already a successor.
1246     if (!AddedSuccessors.insert(Succ).second)
1247       continue;
1248     CurBB.addSuccessor(&getMBB(*Succ));
1249   }
1250 
1251   return true;
1252 }
1253 
1254 static bool isSwiftError(const Value *V) {
1255   if (auto Arg = dyn_cast<Argument>(V))
1256     return Arg->hasSwiftErrorAttr();
1257   if (auto AI = dyn_cast<AllocaInst>(V))
1258     return AI->isSwiftError();
1259   return false;
1260 }
1261 
1262 bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) {
1263   const LoadInst &LI = cast<LoadInst>(U);
1264   if (DL->getTypeStoreSize(LI.getType()) == 0)
1265     return true;
1266 
1267   ArrayRef<Register> Regs = getOrCreateVRegs(LI);
1268   ArrayRef<uint64_t> Offsets = *VMap.getOffsets(LI);
1269   Register Base = getOrCreateVReg(*LI.getPointerOperand());
1270 
1271   Type *OffsetIRTy = DL->getIntPtrType(LI.getPointerOperandType());
1272   LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL);
1273 
1274   if (CLI->supportSwiftError() && isSwiftError(LI.getPointerOperand())) {
1275     assert(Regs.size() == 1 && "swifterror should be single pointer");
1276     Register VReg = SwiftError.getOrCreateVRegUseAt(&LI, &MIRBuilder.getMBB(),
1277                                                     LI.getPointerOperand());
1278     MIRBuilder.buildCopy(Regs[0], VReg);
1279     return true;
1280   }
1281 
1282   auto &TLI = *MF->getSubtarget().getTargetLowering();
1283   MachineMemOperand::Flags Flags = TLI.getLoadMemOperandFlags(LI, *DL);
1284 
1285   const MDNode *Ranges =
1286       Regs.size() == 1 ? LI.getMetadata(LLVMContext::MD_range) : nullptr;
1287   for (unsigned i = 0; i < Regs.size(); ++i) {
1288     Register Addr;
1289     MIRBuilder.materializePtrAdd(Addr, Base, OffsetTy, Offsets[i] / 8);
1290 
1291     MachinePointerInfo Ptr(LI.getPointerOperand(), Offsets[i] / 8);
1292     Align BaseAlign = getMemOpAlign(LI);
1293     AAMDNodes AAMetadata;
1294     LI.getAAMetadata(AAMetadata);
1295     auto MMO = MF->getMachineMemOperand(
1296         Ptr, Flags, MRI->getType(Regs[i]).getSizeInBytes(),
1297         commonAlignment(BaseAlign, Offsets[i] / 8), AAMetadata, Ranges,
1298         LI.getSyncScopeID(), LI.getOrdering());
1299     MIRBuilder.buildLoad(Regs[i], Addr, *MMO);
1300   }
1301 
1302   return true;
1303 }
1304 
1305 bool IRTranslator::translateStore(const User &U, MachineIRBuilder &MIRBuilder) {
1306   const StoreInst &SI = cast<StoreInst>(U);
1307   if (DL->getTypeStoreSize(SI.getValueOperand()->getType()) == 0)
1308     return true;
1309 
1310   ArrayRef<Register> Vals = getOrCreateVRegs(*SI.getValueOperand());
1311   ArrayRef<uint64_t> Offsets = *VMap.getOffsets(*SI.getValueOperand());
1312   Register Base = getOrCreateVReg(*SI.getPointerOperand());
1313 
1314   Type *OffsetIRTy = DL->getIntPtrType(SI.getPointerOperandType());
1315   LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL);
1316 
1317   if (CLI->supportSwiftError() && isSwiftError(SI.getPointerOperand())) {
1318     assert(Vals.size() == 1 && "swifterror should be single pointer");
1319 
1320     Register VReg = SwiftError.getOrCreateVRegDefAt(&SI, &MIRBuilder.getMBB(),
1321                                                     SI.getPointerOperand());
1322     MIRBuilder.buildCopy(VReg, Vals[0]);
1323     return true;
1324   }
1325 
1326   auto &TLI = *MF->getSubtarget().getTargetLowering();
1327   MachineMemOperand::Flags Flags = TLI.getStoreMemOperandFlags(SI, *DL);
1328 
1329   for (unsigned i = 0; i < Vals.size(); ++i) {
1330     Register Addr;
1331     MIRBuilder.materializePtrAdd(Addr, Base, OffsetTy, Offsets[i] / 8);
1332 
1333     MachinePointerInfo Ptr(SI.getPointerOperand(), Offsets[i] / 8);
1334     Align BaseAlign = getMemOpAlign(SI);
1335     AAMDNodes AAMetadata;
1336     SI.getAAMetadata(AAMetadata);
1337     auto MMO = MF->getMachineMemOperand(
1338         Ptr, Flags, MRI->getType(Vals[i]).getSizeInBytes(),
1339         commonAlignment(BaseAlign, Offsets[i] / 8), AAMetadata, nullptr,
1340         SI.getSyncScopeID(), SI.getOrdering());
1341     MIRBuilder.buildStore(Vals[i], Addr, *MMO);
1342   }
1343   return true;
1344 }
1345 
1346 static uint64_t getOffsetFromIndices(const User &U, const DataLayout &DL) {
1347   const Value *Src = U.getOperand(0);
1348   Type *Int32Ty = Type::getInt32Ty(U.getContext());
1349 
1350   // getIndexedOffsetInType is designed for GEPs, so the first index is the
1351   // usual array element rather than looking into the actual aggregate.
1352   SmallVector<Value *, 1> Indices;
1353   Indices.push_back(ConstantInt::get(Int32Ty, 0));
1354 
1355   if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(&U)) {
1356     for (auto Idx : EVI->indices())
1357       Indices.push_back(ConstantInt::get(Int32Ty, Idx));
1358   } else if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(&U)) {
1359     for (auto Idx : IVI->indices())
1360       Indices.push_back(ConstantInt::get(Int32Ty, Idx));
1361   } else {
1362     for (unsigned i = 1; i < U.getNumOperands(); ++i)
1363       Indices.push_back(U.getOperand(i));
1364   }
1365 
1366   return 8 * static_cast<uint64_t>(
1367                  DL.getIndexedOffsetInType(Src->getType(), Indices));
1368 }
1369 
1370 bool IRTranslator::translateExtractValue(const User &U,
1371                                          MachineIRBuilder &MIRBuilder) {
1372   const Value *Src = U.getOperand(0);
1373   uint64_t Offset = getOffsetFromIndices(U, *DL);
1374   ArrayRef<Register> SrcRegs = getOrCreateVRegs(*Src);
1375   ArrayRef<uint64_t> Offsets = *VMap.getOffsets(*Src);
1376   unsigned Idx = llvm::lower_bound(Offsets, Offset) - Offsets.begin();
1377   auto &DstRegs = allocateVRegs(U);
1378 
1379   for (unsigned i = 0; i < DstRegs.size(); ++i)
1380     DstRegs[i] = SrcRegs[Idx++];
1381 
1382   return true;
1383 }
1384 
1385 bool IRTranslator::translateInsertValue(const User &U,
1386                                         MachineIRBuilder &MIRBuilder) {
1387   const Value *Src = U.getOperand(0);
1388   uint64_t Offset = getOffsetFromIndices(U, *DL);
1389   auto &DstRegs = allocateVRegs(U);
1390   ArrayRef<uint64_t> DstOffsets = *VMap.getOffsets(U);
1391   ArrayRef<Register> SrcRegs = getOrCreateVRegs(*Src);
1392   ArrayRef<Register> InsertedRegs = getOrCreateVRegs(*U.getOperand(1));
1393   auto InsertedIt = InsertedRegs.begin();
1394 
1395   for (unsigned i = 0; i < DstRegs.size(); ++i) {
1396     if (DstOffsets[i] >= Offset && InsertedIt != InsertedRegs.end())
1397       DstRegs[i] = *InsertedIt++;
1398     else
1399       DstRegs[i] = SrcRegs[i];
1400   }
1401 
1402   return true;
1403 }
1404 
1405 bool IRTranslator::translateSelect(const User &U,
1406                                    MachineIRBuilder &MIRBuilder) {
1407   Register Tst = getOrCreateVReg(*U.getOperand(0));
1408   ArrayRef<Register> ResRegs = getOrCreateVRegs(U);
1409   ArrayRef<Register> Op0Regs = getOrCreateVRegs(*U.getOperand(1));
1410   ArrayRef<Register> Op1Regs = getOrCreateVRegs(*U.getOperand(2));
1411 
1412   uint16_t Flags = 0;
1413   if (const SelectInst *SI = dyn_cast<SelectInst>(&U))
1414     Flags = MachineInstr::copyFlagsFromInstruction(*SI);
1415 
1416   for (unsigned i = 0; i < ResRegs.size(); ++i) {
1417     MIRBuilder.buildSelect(ResRegs[i], Tst, Op0Regs[i], Op1Regs[i], Flags);
1418   }
1419 
1420   return true;
1421 }
1422 
1423 bool IRTranslator::translateCopy(const User &U, const Value &V,
1424                                  MachineIRBuilder &MIRBuilder) {
1425   Register Src = getOrCreateVReg(V);
1426   auto &Regs = *VMap.getVRegs(U);
1427   if (Regs.empty()) {
1428     Regs.push_back(Src);
1429     VMap.getOffsets(U)->push_back(0);
1430   } else {
1431     // If we already assigned a vreg for this instruction, we can't change that.
1432     // Emit a copy to satisfy the users we already emitted.
1433     MIRBuilder.buildCopy(Regs[0], Src);
1434   }
1435   return true;
1436 }
1437 
1438 bool IRTranslator::translateBitCast(const User &U,
1439                                     MachineIRBuilder &MIRBuilder) {
1440   // If we're bitcasting to the source type, we can reuse the source vreg.
1441   if (getLLTForType(*U.getOperand(0)->getType(), *DL) ==
1442       getLLTForType(*U.getType(), *DL))
1443     return translateCopy(U, *U.getOperand(0), MIRBuilder);
1444 
1445   return translateCast(TargetOpcode::G_BITCAST, U, MIRBuilder);
1446 }
1447 
1448 bool IRTranslator::translateCast(unsigned Opcode, const User &U,
1449                                  MachineIRBuilder &MIRBuilder) {
1450   Register Op = getOrCreateVReg(*U.getOperand(0));
1451   Register Res = getOrCreateVReg(U);
1452   MIRBuilder.buildInstr(Opcode, {Res}, {Op});
1453   return true;
1454 }
1455 
1456 bool IRTranslator::translateGetElementPtr(const User &U,
1457                                           MachineIRBuilder &MIRBuilder) {
1458   Value &Op0 = *U.getOperand(0);
1459   Register BaseReg = getOrCreateVReg(Op0);
1460   Type *PtrIRTy = Op0.getType();
1461   LLT PtrTy = getLLTForType(*PtrIRTy, *DL);
1462   Type *OffsetIRTy = DL->getIntPtrType(PtrIRTy);
1463   LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL);
1464 
1465   // Normalize Vector GEP - all scalar operands should be converted to the
1466   // splat vector.
1467   unsigned VectorWidth = 0;
1468   if (auto *VT = dyn_cast<VectorType>(U.getType()))
1469     VectorWidth = cast<FixedVectorType>(VT)->getNumElements();
1470 
1471   // We might need to splat the base pointer into a vector if the offsets
1472   // are vectors.
1473   if (VectorWidth && !PtrTy.isVector()) {
1474     BaseReg =
1475         MIRBuilder.buildSplatVector(LLT::vector(VectorWidth, PtrTy), BaseReg)
1476             .getReg(0);
1477     PtrIRTy = FixedVectorType::get(PtrIRTy, VectorWidth);
1478     PtrTy = getLLTForType(*PtrIRTy, *DL);
1479     OffsetIRTy = DL->getIntPtrType(PtrIRTy);
1480     OffsetTy = getLLTForType(*OffsetIRTy, *DL);
1481   }
1482 
1483   int64_t Offset = 0;
1484   for (gep_type_iterator GTI = gep_type_begin(&U), E = gep_type_end(&U);
1485        GTI != E; ++GTI) {
1486     const Value *Idx = GTI.getOperand();
1487     if (StructType *StTy = GTI.getStructTypeOrNull()) {
1488       unsigned Field = cast<Constant>(Idx)->getUniqueInteger().getZExtValue();
1489       Offset += DL->getStructLayout(StTy)->getElementOffset(Field);
1490       continue;
1491     } else {
1492       uint64_t ElementSize = DL->getTypeAllocSize(GTI.getIndexedType());
1493 
1494       // If this is a scalar constant or a splat vector of constants,
1495       // handle it quickly.
1496       if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
1497         Offset += ElementSize * CI->getSExtValue();
1498         continue;
1499       }
1500 
1501       if (Offset != 0) {
1502         auto OffsetMIB = MIRBuilder.buildConstant({OffsetTy}, Offset);
1503         BaseReg = MIRBuilder.buildPtrAdd(PtrTy, BaseReg, OffsetMIB.getReg(0))
1504                       .getReg(0);
1505         Offset = 0;
1506       }
1507 
1508       Register IdxReg = getOrCreateVReg(*Idx);
1509       LLT IdxTy = MRI->getType(IdxReg);
1510       if (IdxTy != OffsetTy) {
1511         if (!IdxTy.isVector() && VectorWidth) {
1512           IdxReg = MIRBuilder.buildSplatVector(
1513             OffsetTy.changeElementType(IdxTy), IdxReg).getReg(0);
1514         }
1515 
1516         IdxReg = MIRBuilder.buildSExtOrTrunc(OffsetTy, IdxReg).getReg(0);
1517       }
1518 
1519       // N = N + Idx * ElementSize;
1520       // Avoid doing it for ElementSize of 1.
1521       Register GepOffsetReg;
1522       if (ElementSize != 1) {
1523         auto ElementSizeMIB = MIRBuilder.buildConstant(
1524             getLLTForType(*OffsetIRTy, *DL), ElementSize);
1525         GepOffsetReg =
1526             MIRBuilder.buildMul(OffsetTy, IdxReg, ElementSizeMIB).getReg(0);
1527       } else
1528         GepOffsetReg = IdxReg;
1529 
1530       BaseReg = MIRBuilder.buildPtrAdd(PtrTy, BaseReg, GepOffsetReg).getReg(0);
1531     }
1532   }
1533 
1534   if (Offset != 0) {
1535     auto OffsetMIB =
1536         MIRBuilder.buildConstant(OffsetTy, Offset);
1537     MIRBuilder.buildPtrAdd(getOrCreateVReg(U), BaseReg, OffsetMIB.getReg(0));
1538     return true;
1539   }
1540 
1541   MIRBuilder.buildCopy(getOrCreateVReg(U), BaseReg);
1542   return true;
1543 }
1544 
1545 bool IRTranslator::translateMemFunc(const CallInst &CI,
1546                                     MachineIRBuilder &MIRBuilder,
1547                                     unsigned Opcode) {
1548 
1549   // If the source is undef, then just emit a nop.
1550   if (isa<UndefValue>(CI.getArgOperand(1)))
1551     return true;
1552 
1553   SmallVector<Register, 3> SrcRegs;
1554 
1555   unsigned MinPtrSize = UINT_MAX;
1556   for (auto AI = CI.arg_begin(), AE = CI.arg_end(); std::next(AI) != AE; ++AI) {
1557     Register SrcReg = getOrCreateVReg(**AI);
1558     LLT SrcTy = MRI->getType(SrcReg);
1559     if (SrcTy.isPointer())
1560       MinPtrSize = std::min(SrcTy.getSizeInBits(), MinPtrSize);
1561     SrcRegs.push_back(SrcReg);
1562   }
1563 
1564   LLT SizeTy = LLT::scalar(MinPtrSize);
1565 
1566   // The size operand should be the minimum of the pointer sizes.
1567   Register &SizeOpReg = SrcRegs[SrcRegs.size() - 1];
1568   if (MRI->getType(SizeOpReg) != SizeTy)
1569     SizeOpReg = MIRBuilder.buildZExtOrTrunc(SizeTy, SizeOpReg).getReg(0);
1570 
1571   auto ICall = MIRBuilder.buildInstr(Opcode);
1572   for (Register SrcReg : SrcRegs)
1573     ICall.addUse(SrcReg);
1574 
1575   Align DstAlign;
1576   Align SrcAlign;
1577   unsigned IsVol =
1578       cast<ConstantInt>(CI.getArgOperand(CI.getNumArgOperands() - 1))
1579           ->getZExtValue();
1580 
1581   if (auto *MCI = dyn_cast<MemCpyInst>(&CI)) {
1582     DstAlign = MCI->getDestAlign().valueOrOne();
1583     SrcAlign = MCI->getSourceAlign().valueOrOne();
1584   } else if (auto *MMI = dyn_cast<MemMoveInst>(&CI)) {
1585     DstAlign = MMI->getDestAlign().valueOrOne();
1586     SrcAlign = MMI->getSourceAlign().valueOrOne();
1587   } else {
1588     auto *MSI = cast<MemSetInst>(&CI);
1589     DstAlign = MSI->getDestAlign().valueOrOne();
1590   }
1591 
1592   // We need to propagate the tail call flag from the IR inst as an argument.
1593   // Otherwise, we have to pessimize and assume later that we cannot tail call
1594   // any memory intrinsics.
1595   ICall.addImm(CI.isTailCall() ? 1 : 0);
1596 
1597   // Create mem operands to store the alignment and volatile info.
1598   auto VolFlag = IsVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone;
1599   ICall.addMemOperand(MF->getMachineMemOperand(
1600       MachinePointerInfo(CI.getArgOperand(0)),
1601       MachineMemOperand::MOStore | VolFlag, 1, DstAlign));
1602   if (Opcode != TargetOpcode::G_MEMSET)
1603     ICall.addMemOperand(MF->getMachineMemOperand(
1604         MachinePointerInfo(CI.getArgOperand(1)),
1605         MachineMemOperand::MOLoad | VolFlag, 1, SrcAlign));
1606 
1607   return true;
1608 }
1609 
1610 void IRTranslator::getStackGuard(Register DstReg,
1611                                  MachineIRBuilder &MIRBuilder) {
1612   const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
1613   MRI->setRegClass(DstReg, TRI->getPointerRegClass(*MF));
1614   auto MIB =
1615       MIRBuilder.buildInstr(TargetOpcode::LOAD_STACK_GUARD, {DstReg}, {});
1616 
1617   auto &TLI = *MF->getSubtarget().getTargetLowering();
1618   Value *Global = TLI.getSDagStackGuard(*MF->getFunction().getParent());
1619   if (!Global)
1620     return;
1621 
1622   MachinePointerInfo MPInfo(Global);
1623   auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant |
1624                MachineMemOperand::MODereferenceable;
1625   MachineMemOperand *MemRef =
1626       MF->getMachineMemOperand(MPInfo, Flags, DL->getPointerSizeInBits() / 8,
1627                                DL->getPointerABIAlignment(0));
1628   MIB.setMemRefs({MemRef});
1629 }
1630 
1631 bool IRTranslator::translateOverflowIntrinsic(const CallInst &CI, unsigned Op,
1632                                               MachineIRBuilder &MIRBuilder) {
1633   ArrayRef<Register> ResRegs = getOrCreateVRegs(CI);
1634   MIRBuilder.buildInstr(
1635       Op, {ResRegs[0], ResRegs[1]},
1636       {getOrCreateVReg(*CI.getOperand(0)), getOrCreateVReg(*CI.getOperand(1))});
1637 
1638   return true;
1639 }
1640 
1641 bool IRTranslator::translateFixedPointIntrinsic(unsigned Op, const CallInst &CI,
1642                                                 MachineIRBuilder &MIRBuilder) {
1643   Register Dst = getOrCreateVReg(CI);
1644   Register Src0 = getOrCreateVReg(*CI.getOperand(0));
1645   Register Src1 = getOrCreateVReg(*CI.getOperand(1));
1646   uint64_t Scale = cast<ConstantInt>(CI.getOperand(2))->getZExtValue();
1647   MIRBuilder.buildInstr(Op, {Dst}, { Src0, Src1, Scale });
1648   return true;
1649 }
1650 
1651 unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) {
1652   switch (ID) {
1653     default:
1654       break;
1655     case Intrinsic::bswap:
1656       return TargetOpcode::G_BSWAP;
1657     case Intrinsic::bitreverse:
1658       return TargetOpcode::G_BITREVERSE;
1659     case Intrinsic::fshl:
1660       return TargetOpcode::G_FSHL;
1661     case Intrinsic::fshr:
1662       return TargetOpcode::G_FSHR;
1663     case Intrinsic::ceil:
1664       return TargetOpcode::G_FCEIL;
1665     case Intrinsic::cos:
1666       return TargetOpcode::G_FCOS;
1667     case Intrinsic::ctpop:
1668       return TargetOpcode::G_CTPOP;
1669     case Intrinsic::exp:
1670       return TargetOpcode::G_FEXP;
1671     case Intrinsic::exp2:
1672       return TargetOpcode::G_FEXP2;
1673     case Intrinsic::fabs:
1674       return TargetOpcode::G_FABS;
1675     case Intrinsic::copysign:
1676       return TargetOpcode::G_FCOPYSIGN;
1677     case Intrinsic::minnum:
1678       return TargetOpcode::G_FMINNUM;
1679     case Intrinsic::maxnum:
1680       return TargetOpcode::G_FMAXNUM;
1681     case Intrinsic::minimum:
1682       return TargetOpcode::G_FMINIMUM;
1683     case Intrinsic::maximum:
1684       return TargetOpcode::G_FMAXIMUM;
1685     case Intrinsic::canonicalize:
1686       return TargetOpcode::G_FCANONICALIZE;
1687     case Intrinsic::floor:
1688       return TargetOpcode::G_FFLOOR;
1689     case Intrinsic::fma:
1690       return TargetOpcode::G_FMA;
1691     case Intrinsic::log:
1692       return TargetOpcode::G_FLOG;
1693     case Intrinsic::log2:
1694       return TargetOpcode::G_FLOG2;
1695     case Intrinsic::log10:
1696       return TargetOpcode::G_FLOG10;
1697     case Intrinsic::nearbyint:
1698       return TargetOpcode::G_FNEARBYINT;
1699     case Intrinsic::pow:
1700       return TargetOpcode::G_FPOW;
1701     case Intrinsic::powi:
1702       return TargetOpcode::G_FPOWI;
1703     case Intrinsic::rint:
1704       return TargetOpcode::G_FRINT;
1705     case Intrinsic::round:
1706       return TargetOpcode::G_INTRINSIC_ROUND;
1707     case Intrinsic::roundeven:
1708       return TargetOpcode::G_INTRINSIC_ROUNDEVEN;
1709     case Intrinsic::sin:
1710       return TargetOpcode::G_FSIN;
1711     case Intrinsic::sqrt:
1712       return TargetOpcode::G_FSQRT;
1713     case Intrinsic::trunc:
1714       return TargetOpcode::G_INTRINSIC_TRUNC;
1715     case Intrinsic::readcyclecounter:
1716       return TargetOpcode::G_READCYCLECOUNTER;
1717     case Intrinsic::ptrmask:
1718       return TargetOpcode::G_PTRMASK;
1719     case Intrinsic::lrint:
1720       return TargetOpcode::G_INTRINSIC_LRINT;
1721   }
1722   return Intrinsic::not_intrinsic;
1723 }
1724 
1725 bool IRTranslator::translateSimpleIntrinsic(const CallInst &CI,
1726                                             Intrinsic::ID ID,
1727                                             MachineIRBuilder &MIRBuilder) {
1728 
1729   unsigned Op = getSimpleIntrinsicOpcode(ID);
1730 
1731   // Is this a simple intrinsic?
1732   if (Op == Intrinsic::not_intrinsic)
1733     return false;
1734 
1735   // Yes. Let's translate it.
1736   SmallVector<llvm::SrcOp, 4> VRegs;
1737   for (auto &Arg : CI.arg_operands())
1738     VRegs.push_back(getOrCreateVReg(*Arg));
1739 
1740   MIRBuilder.buildInstr(Op, {getOrCreateVReg(CI)}, VRegs,
1741                         MachineInstr::copyFlagsFromInstruction(CI));
1742   return true;
1743 }
1744 
1745 // TODO: Include ConstainedOps.def when all strict instructions are defined.
1746 static unsigned getConstrainedOpcode(Intrinsic::ID ID) {
1747   switch (ID) {
1748   case Intrinsic::experimental_constrained_fadd:
1749     return TargetOpcode::G_STRICT_FADD;
1750   case Intrinsic::experimental_constrained_fsub:
1751     return TargetOpcode::G_STRICT_FSUB;
1752   case Intrinsic::experimental_constrained_fmul:
1753     return TargetOpcode::G_STRICT_FMUL;
1754   case Intrinsic::experimental_constrained_fdiv:
1755     return TargetOpcode::G_STRICT_FDIV;
1756   case Intrinsic::experimental_constrained_frem:
1757     return TargetOpcode::G_STRICT_FREM;
1758   case Intrinsic::experimental_constrained_fma:
1759     return TargetOpcode::G_STRICT_FMA;
1760   case Intrinsic::experimental_constrained_sqrt:
1761     return TargetOpcode::G_STRICT_FSQRT;
1762   default:
1763     return 0;
1764   }
1765 }
1766 
1767 bool IRTranslator::translateConstrainedFPIntrinsic(
1768   const ConstrainedFPIntrinsic &FPI, MachineIRBuilder &MIRBuilder) {
1769   fp::ExceptionBehavior EB = FPI.getExceptionBehavior().getValue();
1770 
1771   unsigned Opcode = getConstrainedOpcode(FPI.getIntrinsicID());
1772   if (!Opcode)
1773     return false;
1774 
1775   unsigned Flags = MachineInstr::copyFlagsFromInstruction(FPI);
1776   if (EB == fp::ExceptionBehavior::ebIgnore)
1777     Flags |= MachineInstr::NoFPExcept;
1778 
1779   SmallVector<llvm::SrcOp, 4> VRegs;
1780   VRegs.push_back(getOrCreateVReg(*FPI.getArgOperand(0)));
1781   if (!FPI.isUnaryOp())
1782     VRegs.push_back(getOrCreateVReg(*FPI.getArgOperand(1)));
1783   if (FPI.isTernaryOp())
1784     VRegs.push_back(getOrCreateVReg(*FPI.getArgOperand(2)));
1785 
1786   MIRBuilder.buildInstr(Opcode, {getOrCreateVReg(FPI)}, VRegs, Flags);
1787   return true;
1788 }
1789 
1790 bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
1791                                            MachineIRBuilder &MIRBuilder) {
1792 
1793   // If this is a simple intrinsic (that is, we just need to add a def of
1794   // a vreg, and uses for each arg operand, then translate it.
1795   if (translateSimpleIntrinsic(CI, ID, MIRBuilder))
1796     return true;
1797 
1798   switch (ID) {
1799   default:
1800     break;
1801   case Intrinsic::lifetime_start:
1802   case Intrinsic::lifetime_end: {
1803     // No stack colouring in O0, discard region information.
1804     if (MF->getTarget().getOptLevel() == CodeGenOpt::None)
1805       return true;
1806 
1807     unsigned Op = ID == Intrinsic::lifetime_start ? TargetOpcode::LIFETIME_START
1808                                                   : TargetOpcode::LIFETIME_END;
1809 
1810     // Get the underlying objects for the location passed on the lifetime
1811     // marker.
1812     SmallVector<const Value *, 4> Allocas;
1813     getUnderlyingObjects(CI.getArgOperand(1), Allocas);
1814 
1815     // Iterate over each underlying object, creating lifetime markers for each
1816     // static alloca. Quit if we find a non-static alloca.
1817     for (const Value *V : Allocas) {
1818       const AllocaInst *AI = dyn_cast<AllocaInst>(V);
1819       if (!AI)
1820         continue;
1821 
1822       if (!AI->isStaticAlloca())
1823         return true;
1824 
1825       MIRBuilder.buildInstr(Op).addFrameIndex(getOrCreateFrameIndex(*AI));
1826     }
1827     return true;
1828   }
1829   case Intrinsic::dbg_declare: {
1830     const DbgDeclareInst &DI = cast<DbgDeclareInst>(CI);
1831     assert(DI.getVariable() && "Missing variable");
1832 
1833     const Value *Address = DI.getAddress();
1834     if (!Address || isa<UndefValue>(Address)) {
1835       LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
1836       return true;
1837     }
1838 
1839     assert(DI.getVariable()->isValidLocationForIntrinsic(
1840                MIRBuilder.getDebugLoc()) &&
1841            "Expected inlined-at fields to agree");
1842     auto AI = dyn_cast<AllocaInst>(Address);
1843     if (AI && AI->isStaticAlloca()) {
1844       // Static allocas are tracked at the MF level, no need for DBG_VALUE
1845       // instructions (in fact, they get ignored if they *do* exist).
1846       MF->setVariableDbgInfo(DI.getVariable(), DI.getExpression(),
1847                              getOrCreateFrameIndex(*AI), DI.getDebugLoc());
1848     } else {
1849       // A dbg.declare describes the address of a source variable, so lower it
1850       // into an indirect DBG_VALUE.
1851       MIRBuilder.buildIndirectDbgValue(getOrCreateVReg(*Address),
1852                                        DI.getVariable(), DI.getExpression());
1853     }
1854     return true;
1855   }
1856   case Intrinsic::dbg_label: {
1857     const DbgLabelInst &DI = cast<DbgLabelInst>(CI);
1858     assert(DI.getLabel() && "Missing label");
1859 
1860     assert(DI.getLabel()->isValidLocationForIntrinsic(
1861                MIRBuilder.getDebugLoc()) &&
1862            "Expected inlined-at fields to agree");
1863 
1864     MIRBuilder.buildDbgLabel(DI.getLabel());
1865     return true;
1866   }
1867   case Intrinsic::vaend:
1868     // No target I know of cares about va_end. Certainly no in-tree target
1869     // does. Simplest intrinsic ever!
1870     return true;
1871   case Intrinsic::vastart: {
1872     auto &TLI = *MF->getSubtarget().getTargetLowering();
1873     Value *Ptr = CI.getArgOperand(0);
1874     unsigned ListSize = TLI.getVaListSizeInBits(*DL) / 8;
1875 
1876     // FIXME: Get alignment
1877     MIRBuilder.buildInstr(TargetOpcode::G_VASTART, {}, {getOrCreateVReg(*Ptr)})
1878         .addMemOperand(MF->getMachineMemOperand(MachinePointerInfo(Ptr),
1879                                                 MachineMemOperand::MOStore,
1880                                                 ListSize, Align(1)));
1881     return true;
1882   }
1883   case Intrinsic::dbg_value: {
1884     // This form of DBG_VALUE is target-independent.
1885     const DbgValueInst &DI = cast<DbgValueInst>(CI);
1886     const Value *V = DI.getValue();
1887     assert(DI.getVariable()->isValidLocationForIntrinsic(
1888                MIRBuilder.getDebugLoc()) &&
1889            "Expected inlined-at fields to agree");
1890     if (!V) {
1891       // Currently the optimizer can produce this; insert an undef to
1892       // help debugging.  Probably the optimizer should not do this.
1893       MIRBuilder.buildIndirectDbgValue(0, DI.getVariable(), DI.getExpression());
1894     } else if (const auto *CI = dyn_cast<Constant>(V)) {
1895       MIRBuilder.buildConstDbgValue(*CI, DI.getVariable(), DI.getExpression());
1896     } else {
1897       for (Register Reg : getOrCreateVRegs(*V)) {
1898         // FIXME: This does not handle register-indirect values at offset 0. The
1899         // direct/indirect thing shouldn't really be handled by something as
1900         // implicit as reg+noreg vs reg+imm in the first place, but it seems
1901         // pretty baked in right now.
1902         MIRBuilder.buildDirectDbgValue(Reg, DI.getVariable(), DI.getExpression());
1903       }
1904     }
1905     return true;
1906   }
1907   case Intrinsic::uadd_with_overflow:
1908     return translateOverflowIntrinsic(CI, TargetOpcode::G_UADDO, MIRBuilder);
1909   case Intrinsic::sadd_with_overflow:
1910     return translateOverflowIntrinsic(CI, TargetOpcode::G_SADDO, MIRBuilder);
1911   case Intrinsic::usub_with_overflow:
1912     return translateOverflowIntrinsic(CI, TargetOpcode::G_USUBO, MIRBuilder);
1913   case Intrinsic::ssub_with_overflow:
1914     return translateOverflowIntrinsic(CI, TargetOpcode::G_SSUBO, MIRBuilder);
1915   case Intrinsic::umul_with_overflow:
1916     return translateOverflowIntrinsic(CI, TargetOpcode::G_UMULO, MIRBuilder);
1917   case Intrinsic::smul_with_overflow:
1918     return translateOverflowIntrinsic(CI, TargetOpcode::G_SMULO, MIRBuilder);
1919   case Intrinsic::uadd_sat:
1920     return translateBinaryOp(TargetOpcode::G_UADDSAT, CI, MIRBuilder);
1921   case Intrinsic::sadd_sat:
1922     return translateBinaryOp(TargetOpcode::G_SADDSAT, CI, MIRBuilder);
1923   case Intrinsic::usub_sat:
1924     return translateBinaryOp(TargetOpcode::G_USUBSAT, CI, MIRBuilder);
1925   case Intrinsic::ssub_sat:
1926     return translateBinaryOp(TargetOpcode::G_SSUBSAT, CI, MIRBuilder);
1927   case Intrinsic::ushl_sat:
1928     return translateBinaryOp(TargetOpcode::G_USHLSAT, CI, MIRBuilder);
1929   case Intrinsic::sshl_sat:
1930     return translateBinaryOp(TargetOpcode::G_SSHLSAT, CI, MIRBuilder);
1931   case Intrinsic::umin:
1932     return translateBinaryOp(TargetOpcode::G_UMIN, CI, MIRBuilder);
1933   case Intrinsic::umax:
1934     return translateBinaryOp(TargetOpcode::G_UMAX, CI, MIRBuilder);
1935   case Intrinsic::smin:
1936     return translateBinaryOp(TargetOpcode::G_SMIN, CI, MIRBuilder);
1937   case Intrinsic::smax:
1938     return translateBinaryOp(TargetOpcode::G_SMAX, CI, MIRBuilder);
1939   case Intrinsic::abs:
1940     // TODO: Preserve "int min is poison" arg in GMIR?
1941     return translateUnaryOp(TargetOpcode::G_ABS, CI, MIRBuilder);
1942   case Intrinsic::smul_fix:
1943     return translateFixedPointIntrinsic(TargetOpcode::G_SMULFIX, CI, MIRBuilder);
1944   case Intrinsic::umul_fix:
1945     return translateFixedPointIntrinsic(TargetOpcode::G_UMULFIX, CI, MIRBuilder);
1946   case Intrinsic::smul_fix_sat:
1947     return translateFixedPointIntrinsic(TargetOpcode::G_SMULFIXSAT, CI, MIRBuilder);
1948   case Intrinsic::umul_fix_sat:
1949     return translateFixedPointIntrinsic(TargetOpcode::G_UMULFIXSAT, CI, MIRBuilder);
1950   case Intrinsic::sdiv_fix:
1951     return translateFixedPointIntrinsic(TargetOpcode::G_SDIVFIX, CI, MIRBuilder);
1952   case Intrinsic::udiv_fix:
1953     return translateFixedPointIntrinsic(TargetOpcode::G_UDIVFIX, CI, MIRBuilder);
1954   case Intrinsic::sdiv_fix_sat:
1955     return translateFixedPointIntrinsic(TargetOpcode::G_SDIVFIXSAT, CI, MIRBuilder);
1956   case Intrinsic::udiv_fix_sat:
1957     return translateFixedPointIntrinsic(TargetOpcode::G_UDIVFIXSAT, CI, MIRBuilder);
1958   case Intrinsic::fmuladd: {
1959     const TargetMachine &TM = MF->getTarget();
1960     const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering();
1961     Register Dst = getOrCreateVReg(CI);
1962     Register Op0 = getOrCreateVReg(*CI.getArgOperand(0));
1963     Register Op1 = getOrCreateVReg(*CI.getArgOperand(1));
1964     Register Op2 = getOrCreateVReg(*CI.getArgOperand(2));
1965     if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
1966         TLI.isFMAFasterThanFMulAndFAdd(*MF,
1967                                        TLI.getValueType(*DL, CI.getType()))) {
1968       // TODO: Revisit this to see if we should move this part of the
1969       // lowering to the combiner.
1970       MIRBuilder.buildFMA(Dst, Op0, Op1, Op2,
1971                           MachineInstr::copyFlagsFromInstruction(CI));
1972     } else {
1973       LLT Ty = getLLTForType(*CI.getType(), *DL);
1974       auto FMul = MIRBuilder.buildFMul(
1975           Ty, Op0, Op1, MachineInstr::copyFlagsFromInstruction(CI));
1976       MIRBuilder.buildFAdd(Dst, FMul, Op2,
1977                            MachineInstr::copyFlagsFromInstruction(CI));
1978     }
1979     return true;
1980   }
1981   case Intrinsic::convert_from_fp16:
1982     // FIXME: This intrinsic should probably be removed from the IR.
1983     MIRBuilder.buildFPExt(getOrCreateVReg(CI),
1984                           getOrCreateVReg(*CI.getArgOperand(0)),
1985                           MachineInstr::copyFlagsFromInstruction(CI));
1986     return true;
1987   case Intrinsic::convert_to_fp16:
1988     // FIXME: This intrinsic should probably be removed from the IR.
1989     MIRBuilder.buildFPTrunc(getOrCreateVReg(CI),
1990                             getOrCreateVReg(*CI.getArgOperand(0)),
1991                             MachineInstr::copyFlagsFromInstruction(CI));
1992     return true;
1993   case Intrinsic::memcpy:
1994     return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMCPY);
1995   case Intrinsic::memmove:
1996     return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMMOVE);
1997   case Intrinsic::memset:
1998     return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMSET);
1999   case Intrinsic::eh_typeid_for: {
2000     GlobalValue *GV = ExtractTypeInfo(CI.getArgOperand(0));
2001     Register Reg = getOrCreateVReg(CI);
2002     unsigned TypeID = MF->getTypeIDFor(GV);
2003     MIRBuilder.buildConstant(Reg, TypeID);
2004     return true;
2005   }
2006   case Intrinsic::objectsize:
2007     llvm_unreachable("llvm.objectsize.* should have been lowered already");
2008 
2009   case Intrinsic::is_constant:
2010     llvm_unreachable("llvm.is.constant.* should have been lowered already");
2011 
2012   case Intrinsic::stackguard:
2013     getStackGuard(getOrCreateVReg(CI), MIRBuilder);
2014     return true;
2015   case Intrinsic::stackprotector: {
2016     LLT PtrTy = getLLTForType(*CI.getArgOperand(0)->getType(), *DL);
2017     Register GuardVal = MRI->createGenericVirtualRegister(PtrTy);
2018     getStackGuard(GuardVal, MIRBuilder);
2019 
2020     AllocaInst *Slot = cast<AllocaInst>(CI.getArgOperand(1));
2021     int FI = getOrCreateFrameIndex(*Slot);
2022     MF->getFrameInfo().setStackProtectorIndex(FI);
2023 
2024     MIRBuilder.buildStore(
2025         GuardVal, getOrCreateVReg(*Slot),
2026         *MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(*MF, FI),
2027                                   MachineMemOperand::MOStore |
2028                                       MachineMemOperand::MOVolatile,
2029                                   PtrTy.getSizeInBits() / 8, Align(8)));
2030     return true;
2031   }
2032   case Intrinsic::stacksave: {
2033     // Save the stack pointer to the location provided by the intrinsic.
2034     Register Reg = getOrCreateVReg(CI);
2035     Register StackPtr = MF->getSubtarget()
2036                             .getTargetLowering()
2037                             ->getStackPointerRegisterToSaveRestore();
2038 
2039     // If the target doesn't specify a stack pointer, then fall back.
2040     if (!StackPtr)
2041       return false;
2042 
2043     MIRBuilder.buildCopy(Reg, StackPtr);
2044     return true;
2045   }
2046   case Intrinsic::stackrestore: {
2047     // Restore the stack pointer from the location provided by the intrinsic.
2048     Register Reg = getOrCreateVReg(*CI.getArgOperand(0));
2049     Register StackPtr = MF->getSubtarget()
2050                             .getTargetLowering()
2051                             ->getStackPointerRegisterToSaveRestore();
2052 
2053     // If the target doesn't specify a stack pointer, then fall back.
2054     if (!StackPtr)
2055       return false;
2056 
2057     MIRBuilder.buildCopy(StackPtr, Reg);
2058     return true;
2059   }
2060   case Intrinsic::cttz:
2061   case Intrinsic::ctlz: {
2062     ConstantInt *Cst = cast<ConstantInt>(CI.getArgOperand(1));
2063     bool isTrailing = ID == Intrinsic::cttz;
2064     unsigned Opcode = isTrailing
2065                           ? Cst->isZero() ? TargetOpcode::G_CTTZ
2066                                           : TargetOpcode::G_CTTZ_ZERO_UNDEF
2067                           : Cst->isZero() ? TargetOpcode::G_CTLZ
2068                                           : TargetOpcode::G_CTLZ_ZERO_UNDEF;
2069     MIRBuilder.buildInstr(Opcode, {getOrCreateVReg(CI)},
2070                           {getOrCreateVReg(*CI.getArgOperand(0))});
2071     return true;
2072   }
2073   case Intrinsic::invariant_start: {
2074     LLT PtrTy = getLLTForType(*CI.getArgOperand(0)->getType(), *DL);
2075     Register Undef = MRI->createGenericVirtualRegister(PtrTy);
2076     MIRBuilder.buildUndef(Undef);
2077     return true;
2078   }
2079   case Intrinsic::invariant_end:
2080     return true;
2081   case Intrinsic::expect:
2082   case Intrinsic::annotation:
2083   case Intrinsic::ptr_annotation:
2084   case Intrinsic::launder_invariant_group:
2085   case Intrinsic::strip_invariant_group: {
2086     // Drop the intrinsic, but forward the value.
2087     MIRBuilder.buildCopy(getOrCreateVReg(CI),
2088                          getOrCreateVReg(*CI.getArgOperand(0)));
2089     return true;
2090   }
2091   case Intrinsic::assume:
2092   case Intrinsic::var_annotation:
2093   case Intrinsic::sideeffect:
2094     // Discard annotate attributes, assumptions, and artificial side-effects.
2095     return true;
2096   case Intrinsic::read_volatile_register:
2097   case Intrinsic::read_register: {
2098     Value *Arg = CI.getArgOperand(0);
2099     MIRBuilder
2100         .buildInstr(TargetOpcode::G_READ_REGISTER, {getOrCreateVReg(CI)}, {})
2101         .addMetadata(cast<MDNode>(cast<MetadataAsValue>(Arg)->getMetadata()));
2102     return true;
2103   }
2104   case Intrinsic::write_register: {
2105     Value *Arg = CI.getArgOperand(0);
2106     MIRBuilder.buildInstr(TargetOpcode::G_WRITE_REGISTER)
2107       .addMetadata(cast<MDNode>(cast<MetadataAsValue>(Arg)->getMetadata()))
2108       .addUse(getOrCreateVReg(*CI.getArgOperand(1)));
2109     return true;
2110   }
2111   case Intrinsic::localescape: {
2112     MachineBasicBlock &EntryMBB = MF->front();
2113     StringRef EscapedName = GlobalValue::dropLLVMManglingEscape(MF->getName());
2114 
2115     // Directly emit some LOCAL_ESCAPE machine instrs. Label assignment emission
2116     // is the same on all targets.
2117     for (unsigned Idx = 0, E = CI.getNumArgOperands(); Idx < E; ++Idx) {
2118       Value *Arg = CI.getArgOperand(Idx)->stripPointerCasts();
2119       if (isa<ConstantPointerNull>(Arg))
2120         continue; // Skip null pointers. They represent a hole in index space.
2121 
2122       int FI = getOrCreateFrameIndex(*cast<AllocaInst>(Arg));
2123       MCSymbol *FrameAllocSym =
2124           MF->getMMI().getContext().getOrCreateFrameAllocSymbol(EscapedName,
2125                                                                 Idx);
2126 
2127       // This should be inserted at the start of the entry block.
2128       auto LocalEscape =
2129           MIRBuilder.buildInstrNoInsert(TargetOpcode::LOCAL_ESCAPE)
2130               .addSym(FrameAllocSym)
2131               .addFrameIndex(FI);
2132 
2133       EntryMBB.insert(EntryMBB.begin(), LocalEscape);
2134     }
2135 
2136     return true;
2137   }
2138 #define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC)  \
2139   case Intrinsic::INTRINSIC:
2140 #include "llvm/IR/ConstrainedOps.def"
2141     return translateConstrainedFPIntrinsic(cast<ConstrainedFPIntrinsic>(CI),
2142                                            MIRBuilder);
2143 
2144   }
2145   return false;
2146 }
2147 
2148 bool IRTranslator::translateInlineAsm(const CallBase &CB,
2149                                       MachineIRBuilder &MIRBuilder) {
2150 
2151   const InlineAsmLowering *ALI = MF->getSubtarget().getInlineAsmLowering();
2152 
2153   if (!ALI) {
2154     LLVM_DEBUG(
2155         dbgs() << "Inline asm lowering is not supported for this target yet\n");
2156     return false;
2157   }
2158 
2159   return ALI->lowerInlineAsm(
2160       MIRBuilder, CB, [&](const Value &Val) { return getOrCreateVRegs(Val); });
2161 }
2162 
2163 bool IRTranslator::translateCallBase(const CallBase &CB,
2164                                      MachineIRBuilder &MIRBuilder) {
2165   ArrayRef<Register> Res = getOrCreateVRegs(CB);
2166 
2167   SmallVector<ArrayRef<Register>, 8> Args;
2168   Register SwiftInVReg = 0;
2169   Register SwiftErrorVReg = 0;
2170   for (auto &Arg : CB.args()) {
2171     if (CLI->supportSwiftError() && isSwiftError(Arg)) {
2172       assert(SwiftInVReg == 0 && "Expected only one swift error argument");
2173       LLT Ty = getLLTForType(*Arg->getType(), *DL);
2174       SwiftInVReg = MRI->createGenericVirtualRegister(Ty);
2175       MIRBuilder.buildCopy(SwiftInVReg, SwiftError.getOrCreateVRegUseAt(
2176                                             &CB, &MIRBuilder.getMBB(), Arg));
2177       Args.emplace_back(makeArrayRef(SwiftInVReg));
2178       SwiftErrorVReg =
2179           SwiftError.getOrCreateVRegDefAt(&CB, &MIRBuilder.getMBB(), Arg);
2180       continue;
2181     }
2182     Args.push_back(getOrCreateVRegs(*Arg));
2183   }
2184 
2185   // We don't set HasCalls on MFI here yet because call lowering may decide to
2186   // optimize into tail calls. Instead, we defer that to selection where a final
2187   // scan is done to check if any instructions are calls.
2188   bool Success =
2189       CLI->lowerCall(MIRBuilder, CB, Res, Args, SwiftErrorVReg,
2190                      [&]() { return getOrCreateVReg(*CB.getCalledOperand()); });
2191 
2192   // Check if we just inserted a tail call.
2193   if (Success) {
2194     assert(!HasTailCall && "Can't tail call return twice from block?");
2195     const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
2196     HasTailCall = TII->isTailCall(*std::prev(MIRBuilder.getInsertPt()));
2197   }
2198 
2199   return Success;
2200 }
2201 
2202 bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
2203   const CallInst &CI = cast<CallInst>(U);
2204   auto TII = MF->getTarget().getIntrinsicInfo();
2205   const Function *F = CI.getCalledFunction();
2206 
2207   // FIXME: support Windows dllimport function calls.
2208   if (F && (F->hasDLLImportStorageClass() ||
2209             (MF->getTarget().getTargetTriple().isOSWindows() &&
2210              F->hasExternalWeakLinkage())))
2211     return false;
2212 
2213   // FIXME: support control flow guard targets.
2214   if (CI.countOperandBundlesOfType(LLVMContext::OB_cfguardtarget))
2215     return false;
2216 
2217   if (CI.isInlineAsm())
2218     return translateInlineAsm(CI, MIRBuilder);
2219 
2220   Intrinsic::ID ID = Intrinsic::not_intrinsic;
2221   if (F && F->isIntrinsic()) {
2222     ID = F->getIntrinsicID();
2223     if (TII && ID == Intrinsic::not_intrinsic)
2224       ID = static_cast<Intrinsic::ID>(TII->getIntrinsicID(F));
2225   }
2226 
2227   if (!F || !F->isIntrinsic() || ID == Intrinsic::not_intrinsic)
2228     return translateCallBase(CI, MIRBuilder);
2229 
2230   assert(ID != Intrinsic::not_intrinsic && "unknown intrinsic");
2231 
2232   if (translateKnownIntrinsic(CI, ID, MIRBuilder))
2233     return true;
2234 
2235   ArrayRef<Register> ResultRegs;
2236   if (!CI.getType()->isVoidTy())
2237     ResultRegs = getOrCreateVRegs(CI);
2238 
2239   // Ignore the callsite attributes. Backend code is most likely not expecting
2240   // an intrinsic to sometimes have side effects and sometimes not.
2241   MachineInstrBuilder MIB =
2242       MIRBuilder.buildIntrinsic(ID, ResultRegs, !F->doesNotAccessMemory());
2243   if (isa<FPMathOperator>(CI))
2244     MIB->copyIRFlags(CI);
2245 
2246   for (auto &Arg : enumerate(CI.arg_operands())) {
2247     // If this is required to be an immediate, don't materialize it in a
2248     // register.
2249     if (CI.paramHasAttr(Arg.index(), Attribute::ImmArg)) {
2250       if (ConstantInt *CI = dyn_cast<ConstantInt>(Arg.value())) {
2251         // imm arguments are more convenient than cimm (and realistically
2252         // probably sufficient), so use them.
2253         assert(CI->getBitWidth() <= 64 &&
2254                "large intrinsic immediates not handled");
2255         MIB.addImm(CI->getSExtValue());
2256       } else {
2257         MIB.addFPImm(cast<ConstantFP>(Arg.value()));
2258       }
2259     } else if (auto MD = dyn_cast<MetadataAsValue>(Arg.value())) {
2260       auto *MDN = dyn_cast<MDNode>(MD->getMetadata());
2261       if (!MDN) // This was probably an MDString.
2262         return false;
2263       MIB.addMetadata(MDN);
2264     } else {
2265       ArrayRef<Register> VRegs = getOrCreateVRegs(*Arg.value());
2266       if (VRegs.size() > 1)
2267         return false;
2268       MIB.addUse(VRegs[0]);
2269     }
2270   }
2271 
2272   // Add a MachineMemOperand if it is a target mem intrinsic.
2273   const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering();
2274   TargetLowering::IntrinsicInfo Info;
2275   // TODO: Add a GlobalISel version of getTgtMemIntrinsic.
2276   if (TLI.getTgtMemIntrinsic(Info, CI, *MF, ID)) {
2277     Align Alignment = Info.align.getValueOr(
2278         DL->getABITypeAlign(Info.memVT.getTypeForEVT(F->getContext())));
2279 
2280     uint64_t Size = Info.memVT.getStoreSize();
2281     MIB.addMemOperand(MF->getMachineMemOperand(MachinePointerInfo(Info.ptrVal),
2282                                                Info.flags, Size, Alignment));
2283   }
2284 
2285   return true;
2286 }
2287 
2288 bool IRTranslator::translateInvoke(const User &U,
2289                                    MachineIRBuilder &MIRBuilder) {
2290   const InvokeInst &I = cast<InvokeInst>(U);
2291   MCContext &Context = MF->getContext();
2292 
2293   const BasicBlock *ReturnBB = I.getSuccessor(0);
2294   const BasicBlock *EHPadBB = I.getSuccessor(1);
2295 
2296   const Function *Fn = I.getCalledFunction();
2297   if (I.isInlineAsm())
2298     return false;
2299 
2300   // FIXME: support invoking patchpoint and statepoint intrinsics.
2301   if (Fn && Fn->isIntrinsic())
2302     return false;
2303 
2304   // FIXME: support whatever these are.
2305   if (I.countOperandBundlesOfType(LLVMContext::OB_deopt))
2306     return false;
2307 
2308   // FIXME: support control flow guard targets.
2309   if (I.countOperandBundlesOfType(LLVMContext::OB_cfguardtarget))
2310     return false;
2311 
2312   // FIXME: support Windows exception handling.
2313   if (!isa<LandingPadInst>(EHPadBB->getFirstNonPHI()))
2314     return false;
2315 
2316   // Emit the actual call, bracketed by EH_LABELs so that the MF knows about
2317   // the region covered by the try.
2318   MCSymbol *BeginSymbol = Context.createTempSymbol();
2319   MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(BeginSymbol);
2320 
2321   if (!translateCallBase(I, MIRBuilder))
2322     return false;
2323 
2324   MCSymbol *EndSymbol = Context.createTempSymbol();
2325   MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(EndSymbol);
2326 
2327   // FIXME: track probabilities.
2328   MachineBasicBlock &EHPadMBB = getMBB(*EHPadBB),
2329                     &ReturnMBB = getMBB(*ReturnBB);
2330   MF->addInvoke(&EHPadMBB, BeginSymbol, EndSymbol);
2331   MIRBuilder.getMBB().addSuccessor(&ReturnMBB);
2332   MIRBuilder.getMBB().addSuccessor(&EHPadMBB);
2333   MIRBuilder.buildBr(ReturnMBB);
2334 
2335   return true;
2336 }
2337 
2338 bool IRTranslator::translateCallBr(const User &U,
2339                                    MachineIRBuilder &MIRBuilder) {
2340   // FIXME: Implement this.
2341   return false;
2342 }
2343 
2344 bool IRTranslator::translateLandingPad(const User &U,
2345                                        MachineIRBuilder &MIRBuilder) {
2346   const LandingPadInst &LP = cast<LandingPadInst>(U);
2347 
2348   MachineBasicBlock &MBB = MIRBuilder.getMBB();
2349 
2350   MBB.setIsEHPad();
2351 
2352   // If there aren't registers to copy the values into (e.g., during SjLj
2353   // exceptions), then don't bother.
2354   auto &TLI = *MF->getSubtarget().getTargetLowering();
2355   const Constant *PersonalityFn = MF->getFunction().getPersonalityFn();
2356   if (TLI.getExceptionPointerRegister(PersonalityFn) == 0 &&
2357       TLI.getExceptionSelectorRegister(PersonalityFn) == 0)
2358     return true;
2359 
2360   // If landingpad's return type is token type, we don't create DAG nodes
2361   // for its exception pointer and selector value. The extraction of exception
2362   // pointer or selector value from token type landingpads is not currently
2363   // supported.
2364   if (LP.getType()->isTokenTy())
2365     return true;
2366 
2367   // Add a label to mark the beginning of the landing pad.  Deletion of the
2368   // landing pad can thus be detected via the MachineModuleInfo.
2369   MIRBuilder.buildInstr(TargetOpcode::EH_LABEL)
2370     .addSym(MF->addLandingPad(&MBB));
2371 
2372   // If the unwinder does not preserve all registers, ensure that the
2373   // function marks the clobbered registers as used.
2374   const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo();
2375   if (auto *RegMask = TRI.getCustomEHPadPreservedMask(*MF))
2376     MF->getRegInfo().addPhysRegsUsedFromRegMask(RegMask);
2377 
2378   LLT Ty = getLLTForType(*LP.getType(), *DL);
2379   Register Undef = MRI->createGenericVirtualRegister(Ty);
2380   MIRBuilder.buildUndef(Undef);
2381 
2382   SmallVector<LLT, 2> Tys;
2383   for (Type *Ty : cast<StructType>(LP.getType())->elements())
2384     Tys.push_back(getLLTForType(*Ty, *DL));
2385   assert(Tys.size() == 2 && "Only two-valued landingpads are supported");
2386 
2387   // Mark exception register as live in.
2388   Register ExceptionReg = TLI.getExceptionPointerRegister(PersonalityFn);
2389   if (!ExceptionReg)
2390     return false;
2391 
2392   MBB.addLiveIn(ExceptionReg);
2393   ArrayRef<Register> ResRegs = getOrCreateVRegs(LP);
2394   MIRBuilder.buildCopy(ResRegs[0], ExceptionReg);
2395 
2396   Register SelectorReg = TLI.getExceptionSelectorRegister(PersonalityFn);
2397   if (!SelectorReg)
2398     return false;
2399 
2400   MBB.addLiveIn(SelectorReg);
2401   Register PtrVReg = MRI->createGenericVirtualRegister(Tys[0]);
2402   MIRBuilder.buildCopy(PtrVReg, SelectorReg);
2403   MIRBuilder.buildCast(ResRegs[1], PtrVReg);
2404 
2405   return true;
2406 }
2407 
2408 bool IRTranslator::translateAlloca(const User &U,
2409                                    MachineIRBuilder &MIRBuilder) {
2410   auto &AI = cast<AllocaInst>(U);
2411 
2412   if (AI.isSwiftError())
2413     return true;
2414 
2415   if (AI.isStaticAlloca()) {
2416     Register Res = getOrCreateVReg(AI);
2417     int FI = getOrCreateFrameIndex(AI);
2418     MIRBuilder.buildFrameIndex(Res, FI);
2419     return true;
2420   }
2421 
2422   // FIXME: support stack probing for Windows.
2423   if (MF->getTarget().getTargetTriple().isOSWindows())
2424     return false;
2425 
2426   // Now we're in the harder dynamic case.
2427   Register NumElts = getOrCreateVReg(*AI.getArraySize());
2428   Type *IntPtrIRTy = DL->getIntPtrType(AI.getType());
2429   LLT IntPtrTy = getLLTForType(*IntPtrIRTy, *DL);
2430   if (MRI->getType(NumElts) != IntPtrTy) {
2431     Register ExtElts = MRI->createGenericVirtualRegister(IntPtrTy);
2432     MIRBuilder.buildZExtOrTrunc(ExtElts, NumElts);
2433     NumElts = ExtElts;
2434   }
2435 
2436   Type *Ty = AI.getAllocatedType();
2437 
2438   Register AllocSize = MRI->createGenericVirtualRegister(IntPtrTy);
2439   Register TySize =
2440       getOrCreateVReg(*ConstantInt::get(IntPtrIRTy, DL->getTypeAllocSize(Ty)));
2441   MIRBuilder.buildMul(AllocSize, NumElts, TySize);
2442 
2443   // Round the size of the allocation up to the stack alignment size
2444   // by add SA-1 to the size. This doesn't overflow because we're computing
2445   // an address inside an alloca.
2446   Align StackAlign = MF->getSubtarget().getFrameLowering()->getStackAlign();
2447   auto SAMinusOne = MIRBuilder.buildConstant(IntPtrTy, StackAlign.value() - 1);
2448   auto AllocAdd = MIRBuilder.buildAdd(IntPtrTy, AllocSize, SAMinusOne,
2449                                       MachineInstr::NoUWrap);
2450   auto AlignCst =
2451       MIRBuilder.buildConstant(IntPtrTy, ~(uint64_t)(StackAlign.value() - 1));
2452   auto AlignedAlloc = MIRBuilder.buildAnd(IntPtrTy, AllocAdd, AlignCst);
2453 
2454   Align Alignment = std::max(AI.getAlign(), DL->getPrefTypeAlign(Ty));
2455   if (Alignment <= StackAlign)
2456     Alignment = Align(1);
2457   MIRBuilder.buildDynStackAlloc(getOrCreateVReg(AI), AlignedAlloc, Alignment);
2458 
2459   MF->getFrameInfo().CreateVariableSizedObject(Alignment, &AI);
2460   assert(MF->getFrameInfo().hasVarSizedObjects());
2461   return true;
2462 }
2463 
2464 bool IRTranslator::translateVAArg(const User &U, MachineIRBuilder &MIRBuilder) {
2465   // FIXME: We may need more info about the type. Because of how LLT works,
2466   // we're completely discarding the i64/double distinction here (amongst
2467   // others). Fortunately the ABIs I know of where that matters don't use va_arg
2468   // anyway but that's not guaranteed.
2469   MIRBuilder.buildInstr(TargetOpcode::G_VAARG, {getOrCreateVReg(U)},
2470                         {getOrCreateVReg(*U.getOperand(0)),
2471                          DL->getABITypeAlign(U.getType()).value()});
2472   return true;
2473 }
2474 
2475 bool IRTranslator::translateInsertElement(const User &U,
2476                                           MachineIRBuilder &MIRBuilder) {
2477   // If it is a <1 x Ty> vector, use the scalar as it is
2478   // not a legal vector type in LLT.
2479   if (cast<FixedVectorType>(U.getType())->getNumElements() == 1)
2480     return translateCopy(U, *U.getOperand(1), MIRBuilder);
2481 
2482   Register Res = getOrCreateVReg(U);
2483   Register Val = getOrCreateVReg(*U.getOperand(0));
2484   Register Elt = getOrCreateVReg(*U.getOperand(1));
2485   Register Idx = getOrCreateVReg(*U.getOperand(2));
2486   MIRBuilder.buildInsertVectorElement(Res, Val, Elt, Idx);
2487   return true;
2488 }
2489 
2490 bool IRTranslator::translateExtractElement(const User &U,
2491                                            MachineIRBuilder &MIRBuilder) {
2492   // If it is a <1 x Ty> vector, use the scalar as it is
2493   // not a legal vector type in LLT.
2494   if (cast<FixedVectorType>(U.getOperand(0)->getType())->getNumElements() == 1)
2495     return translateCopy(U, *U.getOperand(0), MIRBuilder);
2496 
2497   Register Res = getOrCreateVReg(U);
2498   Register Val = getOrCreateVReg(*U.getOperand(0));
2499   const auto &TLI = *MF->getSubtarget().getTargetLowering();
2500   unsigned PreferredVecIdxWidth = TLI.getVectorIdxTy(*DL).getSizeInBits();
2501   Register Idx;
2502   if (auto *CI = dyn_cast<ConstantInt>(U.getOperand(1))) {
2503     if (CI->getBitWidth() != PreferredVecIdxWidth) {
2504       APInt NewIdx = CI->getValue().sextOrTrunc(PreferredVecIdxWidth);
2505       auto *NewIdxCI = ConstantInt::get(CI->getContext(), NewIdx);
2506       Idx = getOrCreateVReg(*NewIdxCI);
2507     }
2508   }
2509   if (!Idx)
2510     Idx = getOrCreateVReg(*U.getOperand(1));
2511   if (MRI->getType(Idx).getSizeInBits() != PreferredVecIdxWidth) {
2512     const LLT VecIdxTy = LLT::scalar(PreferredVecIdxWidth);
2513     Idx = MIRBuilder.buildSExtOrTrunc(VecIdxTy, Idx).getReg(0);
2514   }
2515   MIRBuilder.buildExtractVectorElement(Res, Val, Idx);
2516   return true;
2517 }
2518 
2519 bool IRTranslator::translateShuffleVector(const User &U,
2520                                           MachineIRBuilder &MIRBuilder) {
2521   ArrayRef<int> Mask;
2522   if (auto *SVI = dyn_cast<ShuffleVectorInst>(&U))
2523     Mask = SVI->getShuffleMask();
2524   else
2525     Mask = cast<ConstantExpr>(U).getShuffleMask();
2526   ArrayRef<int> MaskAlloc = MF->allocateShuffleMask(Mask);
2527   MIRBuilder
2528       .buildInstr(TargetOpcode::G_SHUFFLE_VECTOR, {getOrCreateVReg(U)},
2529                   {getOrCreateVReg(*U.getOperand(0)),
2530                    getOrCreateVReg(*U.getOperand(1))})
2531       .addShuffleMask(MaskAlloc);
2532   return true;
2533 }
2534 
2535 bool IRTranslator::translatePHI(const User &U, MachineIRBuilder &MIRBuilder) {
2536   const PHINode &PI = cast<PHINode>(U);
2537 
2538   SmallVector<MachineInstr *, 4> Insts;
2539   for (auto Reg : getOrCreateVRegs(PI)) {
2540     auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_PHI, {Reg}, {});
2541     Insts.push_back(MIB.getInstr());
2542   }
2543 
2544   PendingPHIs.emplace_back(&PI, std::move(Insts));
2545   return true;
2546 }
2547 
2548 bool IRTranslator::translateAtomicCmpXchg(const User &U,
2549                                           MachineIRBuilder &MIRBuilder) {
2550   const AtomicCmpXchgInst &I = cast<AtomicCmpXchgInst>(U);
2551 
2552   auto &TLI = *MF->getSubtarget().getTargetLowering();
2553   auto Flags = TLI.getAtomicMemOperandFlags(I, *DL);
2554 
2555   Type *ResType = I.getType();
2556   Type *ValType = ResType->Type::getStructElementType(0);
2557 
2558   auto Res = getOrCreateVRegs(I);
2559   Register OldValRes = Res[0];
2560   Register SuccessRes = Res[1];
2561   Register Addr = getOrCreateVReg(*I.getPointerOperand());
2562   Register Cmp = getOrCreateVReg(*I.getCompareOperand());
2563   Register NewVal = getOrCreateVReg(*I.getNewValOperand());
2564 
2565   AAMDNodes AAMetadata;
2566   I.getAAMetadata(AAMetadata);
2567 
2568   MIRBuilder.buildAtomicCmpXchgWithSuccess(
2569       OldValRes, SuccessRes, Addr, Cmp, NewVal,
2570       *MF->getMachineMemOperand(
2571           MachinePointerInfo(I.getPointerOperand()), Flags,
2572           DL->getTypeStoreSize(ValType), getMemOpAlign(I), AAMetadata, nullptr,
2573           I.getSyncScopeID(), I.getSuccessOrdering(), I.getFailureOrdering()));
2574   return true;
2575 }
2576 
2577 bool IRTranslator::translateAtomicRMW(const User &U,
2578                                       MachineIRBuilder &MIRBuilder) {
2579   const AtomicRMWInst &I = cast<AtomicRMWInst>(U);
2580   auto &TLI = *MF->getSubtarget().getTargetLowering();
2581   auto Flags = TLI.getAtomicMemOperandFlags(I, *DL);
2582 
2583   Type *ResType = I.getType();
2584 
2585   Register Res = getOrCreateVReg(I);
2586   Register Addr = getOrCreateVReg(*I.getPointerOperand());
2587   Register Val = getOrCreateVReg(*I.getValOperand());
2588 
2589   unsigned Opcode = 0;
2590   switch (I.getOperation()) {
2591   default:
2592     return false;
2593   case AtomicRMWInst::Xchg:
2594     Opcode = TargetOpcode::G_ATOMICRMW_XCHG;
2595     break;
2596   case AtomicRMWInst::Add:
2597     Opcode = TargetOpcode::G_ATOMICRMW_ADD;
2598     break;
2599   case AtomicRMWInst::Sub:
2600     Opcode = TargetOpcode::G_ATOMICRMW_SUB;
2601     break;
2602   case AtomicRMWInst::And:
2603     Opcode = TargetOpcode::G_ATOMICRMW_AND;
2604     break;
2605   case AtomicRMWInst::Nand:
2606     Opcode = TargetOpcode::G_ATOMICRMW_NAND;
2607     break;
2608   case AtomicRMWInst::Or:
2609     Opcode = TargetOpcode::G_ATOMICRMW_OR;
2610     break;
2611   case AtomicRMWInst::Xor:
2612     Opcode = TargetOpcode::G_ATOMICRMW_XOR;
2613     break;
2614   case AtomicRMWInst::Max:
2615     Opcode = TargetOpcode::G_ATOMICRMW_MAX;
2616     break;
2617   case AtomicRMWInst::Min:
2618     Opcode = TargetOpcode::G_ATOMICRMW_MIN;
2619     break;
2620   case AtomicRMWInst::UMax:
2621     Opcode = TargetOpcode::G_ATOMICRMW_UMAX;
2622     break;
2623   case AtomicRMWInst::UMin:
2624     Opcode = TargetOpcode::G_ATOMICRMW_UMIN;
2625     break;
2626   case AtomicRMWInst::FAdd:
2627     Opcode = TargetOpcode::G_ATOMICRMW_FADD;
2628     break;
2629   case AtomicRMWInst::FSub:
2630     Opcode = TargetOpcode::G_ATOMICRMW_FSUB;
2631     break;
2632   }
2633 
2634   AAMDNodes AAMetadata;
2635   I.getAAMetadata(AAMetadata);
2636 
2637   MIRBuilder.buildAtomicRMW(
2638       Opcode, Res, Addr, Val,
2639       *MF->getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()),
2640                                 Flags, DL->getTypeStoreSize(ResType),
2641                                 getMemOpAlign(I), AAMetadata, nullptr,
2642                                 I.getSyncScopeID(), I.getOrdering()));
2643   return true;
2644 }
2645 
2646 bool IRTranslator::translateFence(const User &U,
2647                                   MachineIRBuilder &MIRBuilder) {
2648   const FenceInst &Fence = cast<FenceInst>(U);
2649   MIRBuilder.buildFence(static_cast<unsigned>(Fence.getOrdering()),
2650                         Fence.getSyncScopeID());
2651   return true;
2652 }
2653 
2654 bool IRTranslator::translateFreeze(const User &U,
2655                                    MachineIRBuilder &MIRBuilder) {
2656   const ArrayRef<Register> DstRegs = getOrCreateVRegs(U);
2657   const ArrayRef<Register> SrcRegs = getOrCreateVRegs(*U.getOperand(0));
2658 
2659   assert(DstRegs.size() == SrcRegs.size() &&
2660          "Freeze with different source and destination type?");
2661 
2662   for (unsigned I = 0; I < DstRegs.size(); ++I) {
2663     MIRBuilder.buildFreeze(DstRegs[I], SrcRegs[I]);
2664   }
2665 
2666   return true;
2667 }
2668 
2669 void IRTranslator::finishPendingPhis() {
2670 #ifndef NDEBUG
2671   DILocationVerifier Verifier;
2672   GISelObserverWrapper WrapperObserver(&Verifier);
2673   RAIIDelegateInstaller DelInstall(*MF, &WrapperObserver);
2674 #endif // ifndef NDEBUG
2675   for (auto &Phi : PendingPHIs) {
2676     const PHINode *PI = Phi.first;
2677     ArrayRef<MachineInstr *> ComponentPHIs = Phi.second;
2678     MachineBasicBlock *PhiMBB = ComponentPHIs[0]->getParent();
2679     EntryBuilder->setDebugLoc(PI->getDebugLoc());
2680 #ifndef NDEBUG
2681     Verifier.setCurrentInst(PI);
2682 #endif // ifndef NDEBUG
2683 
2684     SmallSet<const MachineBasicBlock *, 16> SeenPreds;
2685     for (unsigned i = 0; i < PI->getNumIncomingValues(); ++i) {
2686       auto IRPred = PI->getIncomingBlock(i);
2687       ArrayRef<Register> ValRegs = getOrCreateVRegs(*PI->getIncomingValue(i));
2688       for (auto Pred : getMachinePredBBs({IRPred, PI->getParent()})) {
2689         if (SeenPreds.count(Pred) || !PhiMBB->isPredecessor(Pred))
2690           continue;
2691         SeenPreds.insert(Pred);
2692         for (unsigned j = 0; j < ValRegs.size(); ++j) {
2693           MachineInstrBuilder MIB(*MF, ComponentPHIs[j]);
2694           MIB.addUse(ValRegs[j]);
2695           MIB.addMBB(Pred);
2696         }
2697       }
2698     }
2699   }
2700 }
2701 
2702 bool IRTranslator::valueIsSplit(const Value &V,
2703                                 SmallVectorImpl<uint64_t> *Offsets) {
2704   SmallVector<LLT, 4> SplitTys;
2705   if (Offsets && !Offsets->empty())
2706     Offsets->clear();
2707   computeValueLLTs(*DL, *V.getType(), SplitTys, Offsets);
2708   return SplitTys.size() > 1;
2709 }
2710 
2711 bool IRTranslator::translate(const Instruction &Inst) {
2712   CurBuilder->setDebugLoc(Inst.getDebugLoc());
2713   // We only emit constants into the entry block from here. To prevent jumpy
2714   // debug behaviour set the line to 0.
2715   if (const DebugLoc &DL = Inst.getDebugLoc())
2716     EntryBuilder->setDebugLoc(
2717         DebugLoc::get(0, 0, DL.getScope(), DL.getInlinedAt()));
2718   else
2719     EntryBuilder->setDebugLoc(DebugLoc());
2720 
2721   auto &TLI = *MF->getSubtarget().getTargetLowering();
2722   if (TLI.fallBackToDAGISel(Inst))
2723     return false;
2724 
2725   switch (Inst.getOpcode()) {
2726 #define HANDLE_INST(NUM, OPCODE, CLASS)                                        \
2727   case Instruction::OPCODE:                                                    \
2728     return translate##OPCODE(Inst, *CurBuilder.get());
2729 #include "llvm/IR/Instruction.def"
2730   default:
2731     return false;
2732   }
2733 }
2734 
2735 bool IRTranslator::translate(const Constant &C, Register Reg) {
2736   if (auto CI = dyn_cast<ConstantInt>(&C))
2737     EntryBuilder->buildConstant(Reg, *CI);
2738   else if (auto CF = dyn_cast<ConstantFP>(&C))
2739     EntryBuilder->buildFConstant(Reg, *CF);
2740   else if (isa<UndefValue>(C))
2741     EntryBuilder->buildUndef(Reg);
2742   else if (isa<ConstantPointerNull>(C))
2743     EntryBuilder->buildConstant(Reg, 0);
2744   else if (auto GV = dyn_cast<GlobalValue>(&C))
2745     EntryBuilder->buildGlobalValue(Reg, GV);
2746   else if (auto CAZ = dyn_cast<ConstantAggregateZero>(&C)) {
2747     if (!CAZ->getType()->isVectorTy())
2748       return false;
2749     // Return the scalar if it is a <1 x Ty> vector.
2750     if (CAZ->getNumElements() == 1)
2751       return translateCopy(C, *CAZ->getElementValue(0u), *EntryBuilder.get());
2752     SmallVector<Register, 4> Ops;
2753     for (unsigned i = 0; i < CAZ->getNumElements(); ++i) {
2754       Constant &Elt = *CAZ->getElementValue(i);
2755       Ops.push_back(getOrCreateVReg(Elt));
2756     }
2757     EntryBuilder->buildBuildVector(Reg, Ops);
2758   } else if (auto CV = dyn_cast<ConstantDataVector>(&C)) {
2759     // Return the scalar if it is a <1 x Ty> vector.
2760     if (CV->getNumElements() == 1)
2761       return translateCopy(C, *CV->getElementAsConstant(0),
2762                            *EntryBuilder.get());
2763     SmallVector<Register, 4> Ops;
2764     for (unsigned i = 0; i < CV->getNumElements(); ++i) {
2765       Constant &Elt = *CV->getElementAsConstant(i);
2766       Ops.push_back(getOrCreateVReg(Elt));
2767     }
2768     EntryBuilder->buildBuildVector(Reg, Ops);
2769   } else if (auto CE = dyn_cast<ConstantExpr>(&C)) {
2770     switch(CE->getOpcode()) {
2771 #define HANDLE_INST(NUM, OPCODE, CLASS)                                        \
2772   case Instruction::OPCODE:                                                    \
2773     return translate##OPCODE(*CE, *EntryBuilder.get());
2774 #include "llvm/IR/Instruction.def"
2775     default:
2776       return false;
2777     }
2778   } else if (auto CV = dyn_cast<ConstantVector>(&C)) {
2779     if (CV->getNumOperands() == 1)
2780       return translateCopy(C, *CV->getOperand(0), *EntryBuilder.get());
2781     SmallVector<Register, 4> Ops;
2782     for (unsigned i = 0; i < CV->getNumOperands(); ++i) {
2783       Ops.push_back(getOrCreateVReg(*CV->getOperand(i)));
2784     }
2785     EntryBuilder->buildBuildVector(Reg, Ops);
2786   } else if (auto *BA = dyn_cast<BlockAddress>(&C)) {
2787     EntryBuilder->buildBlockAddress(Reg, BA);
2788   } else
2789     return false;
2790 
2791   return true;
2792 }
2793 
2794 void IRTranslator::finalizeBasicBlock() {
2795   for (auto &BTB : SL->BitTestCases) {
2796     // Emit header first, if it wasn't already emitted.
2797     if (!BTB.Emitted)
2798       emitBitTestHeader(BTB, BTB.Parent);
2799 
2800     BranchProbability UnhandledProb = BTB.Prob;
2801     for (unsigned j = 0, ej = BTB.Cases.size(); j != ej; ++j) {
2802       UnhandledProb -= BTB.Cases[j].ExtraProb;
2803       // Set the current basic block to the mbb we wish to insert the code into
2804       MachineBasicBlock *MBB = BTB.Cases[j].ThisBB;
2805       // If all cases cover a contiguous range, it is not necessary to jump to
2806       // the default block after the last bit test fails. This is because the
2807       // range check during bit test header creation has guaranteed that every
2808       // case here doesn't go outside the range. In this case, there is no need
2809       // to perform the last bit test, as it will always be true. Instead, make
2810       // the second-to-last bit-test fall through to the target of the last bit
2811       // test, and delete the last bit test.
2812 
2813       MachineBasicBlock *NextMBB;
2814       if (BTB.ContiguousRange && j + 2 == ej) {
2815         // Second-to-last bit-test with contiguous range: fall through to the
2816         // target of the final bit test.
2817         NextMBB = BTB.Cases[j + 1].TargetBB;
2818       } else if (j + 1 == ej) {
2819         // For the last bit test, fall through to Default.
2820         NextMBB = BTB.Default;
2821       } else {
2822         // Otherwise, fall through to the next bit test.
2823         NextMBB = BTB.Cases[j + 1].ThisBB;
2824       }
2825 
2826       emitBitTestCase(BTB, NextMBB, UnhandledProb, BTB.Reg, BTB.Cases[j], MBB);
2827 
2828       // FIXME delete this block below?
2829       if (BTB.ContiguousRange && j + 2 == ej) {
2830         // Since we're not going to use the final bit test, remove it.
2831         BTB.Cases.pop_back();
2832         break;
2833       }
2834     }
2835     // This is "default" BB. We have two jumps to it. From "header" BB and from
2836     // last "case" BB, unless the latter was skipped.
2837     CFGEdge HeaderToDefaultEdge = {BTB.Parent->getBasicBlock(),
2838                                    BTB.Default->getBasicBlock()};
2839     addMachineCFGPred(HeaderToDefaultEdge, BTB.Parent);
2840     if (!BTB.ContiguousRange) {
2841       addMachineCFGPred(HeaderToDefaultEdge, BTB.Cases.back().ThisBB);
2842     }
2843   }
2844   SL->BitTestCases.clear();
2845 
2846   for (auto &JTCase : SL->JTCases) {
2847     // Emit header first, if it wasn't already emitted.
2848     if (!JTCase.first.Emitted)
2849       emitJumpTableHeader(JTCase.second, JTCase.first, JTCase.first.HeaderBB);
2850 
2851     emitJumpTable(JTCase.second, JTCase.second.MBB);
2852   }
2853   SL->JTCases.clear();
2854 
2855   for (auto &SwCase : SL->SwitchCases)
2856     emitSwitchCase(SwCase, &CurBuilder->getMBB(), *CurBuilder);
2857   SL->SwitchCases.clear();
2858 }
2859 
2860 void IRTranslator::finalizeFunction() {
2861   // Release the memory used by the different maps we
2862   // needed during the translation.
2863   PendingPHIs.clear();
2864   VMap.reset();
2865   FrameIndices.clear();
2866   MachinePreds.clear();
2867   // MachineIRBuilder::DebugLoc can outlive the DILocation it holds. Clear it
2868   // to avoid accessing free’d memory (in runOnMachineFunction) and to avoid
2869   // destroying it twice (in ~IRTranslator() and ~LLVMContext())
2870   EntryBuilder.reset();
2871   CurBuilder.reset();
2872   FuncInfo.clear();
2873 }
2874 
2875 /// Returns true if a BasicBlock \p BB within a variadic function contains a
2876 /// variadic musttail call.
2877 static bool checkForMustTailInVarArgFn(bool IsVarArg, const BasicBlock &BB) {
2878   if (!IsVarArg)
2879     return false;
2880 
2881   // Walk the block backwards, because tail calls usually only appear at the end
2882   // of a block.
2883   return std::any_of(BB.rbegin(), BB.rend(), [](const Instruction &I) {
2884     const auto *CI = dyn_cast<CallInst>(&I);
2885     return CI && CI->isMustTailCall();
2886   });
2887 }
2888 
2889 bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
2890   MF = &CurMF;
2891   const Function &F = MF->getFunction();
2892   if (F.empty())
2893     return false;
2894   GISelCSEAnalysisWrapper &Wrapper =
2895       getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
2896   // Set the CSEConfig and run the analysis.
2897   GISelCSEInfo *CSEInfo = nullptr;
2898   TPC = &getAnalysis<TargetPassConfig>();
2899   bool EnableCSE = EnableCSEInIRTranslator.getNumOccurrences()
2900                        ? EnableCSEInIRTranslator
2901                        : TPC->isGISelCSEEnabled();
2902 
2903   if (EnableCSE) {
2904     EntryBuilder = std::make_unique<CSEMIRBuilder>(CurMF);
2905     CSEInfo = &Wrapper.get(TPC->getCSEConfig());
2906     EntryBuilder->setCSEInfo(CSEInfo);
2907     CurBuilder = std::make_unique<CSEMIRBuilder>(CurMF);
2908     CurBuilder->setCSEInfo(CSEInfo);
2909   } else {
2910     EntryBuilder = std::make_unique<MachineIRBuilder>();
2911     CurBuilder = std::make_unique<MachineIRBuilder>();
2912   }
2913   CLI = MF->getSubtarget().getCallLowering();
2914   CurBuilder->setMF(*MF);
2915   EntryBuilder->setMF(*MF);
2916   MRI = &MF->getRegInfo();
2917   DL = &F.getParent()->getDataLayout();
2918   ORE = std::make_unique<OptimizationRemarkEmitter>(&F);
2919   const TargetMachine &TM = MF->getTarget();
2920   TM.resetTargetOptions(F);
2921   EnableOpts = OptLevel != CodeGenOpt::None && !skipFunction(F);
2922   FuncInfo.MF = MF;
2923   if (EnableOpts)
2924     FuncInfo.BPI = &getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
2925   else
2926     FuncInfo.BPI = nullptr;
2927 
2928   const auto &TLI = *MF->getSubtarget().getTargetLowering();
2929 
2930   SL = std::make_unique<GISelSwitchLowering>(this, FuncInfo);
2931   SL->init(TLI, TM, *DL);
2932 
2933 
2934 
2935   assert(PendingPHIs.empty() && "stale PHIs");
2936 
2937   if (!DL->isLittleEndian()) {
2938     // Currently we don't properly handle big endian code.
2939     OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
2940                                F.getSubprogram(), &F.getEntryBlock());
2941     R << "unable to translate in big endian mode";
2942     reportTranslationError(*MF, *TPC, *ORE, R);
2943   }
2944 
2945   // Release the per-function state when we return, whether we succeeded or not.
2946   auto FinalizeOnReturn = make_scope_exit([this]() { finalizeFunction(); });
2947 
2948   // Setup a separate basic-block for the arguments and constants
2949   MachineBasicBlock *EntryBB = MF->CreateMachineBasicBlock();
2950   MF->push_back(EntryBB);
2951   EntryBuilder->setMBB(*EntryBB);
2952 
2953   DebugLoc DbgLoc = F.getEntryBlock().getFirstNonPHI()->getDebugLoc();
2954   SwiftError.setFunction(CurMF);
2955   SwiftError.createEntriesInEntryBlock(DbgLoc);
2956 
2957   bool IsVarArg = F.isVarArg();
2958   bool HasMustTailInVarArgFn = false;
2959 
2960   // Create all blocks, in IR order, to preserve the layout.
2961   for (const BasicBlock &BB: F) {
2962     auto *&MBB = BBToMBB[&BB];
2963 
2964     MBB = MF->CreateMachineBasicBlock(&BB);
2965     MF->push_back(MBB);
2966 
2967     if (BB.hasAddressTaken())
2968       MBB->setHasAddressTaken();
2969 
2970     if (!HasMustTailInVarArgFn)
2971       HasMustTailInVarArgFn = checkForMustTailInVarArgFn(IsVarArg, BB);
2972   }
2973 
2974   MF->getFrameInfo().setHasMustTailInVarArgFunc(HasMustTailInVarArgFn);
2975 
2976   // Make our arguments/constants entry block fallthrough to the IR entry block.
2977   EntryBB->addSuccessor(&getMBB(F.front()));
2978 
2979   if (CLI->fallBackToDAGISel(F)) {
2980     OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
2981                                F.getSubprogram(), &F.getEntryBlock());
2982     R << "unable to lower function: " << ore::NV("Prototype", F.getType());
2983     reportTranslationError(*MF, *TPC, *ORE, R);
2984     return false;
2985   }
2986 
2987   // Lower the actual args into this basic block.
2988   SmallVector<ArrayRef<Register>, 8> VRegArgs;
2989   for (const Argument &Arg: F.args()) {
2990     if (DL->getTypeStoreSize(Arg.getType()).isZero())
2991       continue; // Don't handle zero sized types.
2992     ArrayRef<Register> VRegs = getOrCreateVRegs(Arg);
2993     VRegArgs.push_back(VRegs);
2994 
2995     if (Arg.hasSwiftErrorAttr()) {
2996       assert(VRegs.size() == 1 && "Too many vregs for Swift error");
2997       SwiftError.setCurrentVReg(EntryBB, SwiftError.getFunctionArg(), VRegs[0]);
2998     }
2999   }
3000 
3001   if (!CLI->lowerFormalArguments(*EntryBuilder.get(), F, VRegArgs)) {
3002     OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
3003                                F.getSubprogram(), &F.getEntryBlock());
3004     R << "unable to lower arguments: " << ore::NV("Prototype", F.getType());
3005     reportTranslationError(*MF, *TPC, *ORE, R);
3006     return false;
3007   }
3008 
3009   // Need to visit defs before uses when translating instructions.
3010   GISelObserverWrapper WrapperObserver;
3011   if (EnableCSE && CSEInfo)
3012     WrapperObserver.addObserver(CSEInfo);
3013   {
3014     ReversePostOrderTraversal<const Function *> RPOT(&F);
3015 #ifndef NDEBUG
3016     DILocationVerifier Verifier;
3017     WrapperObserver.addObserver(&Verifier);
3018 #endif // ifndef NDEBUG
3019     RAIIDelegateInstaller DelInstall(*MF, &WrapperObserver);
3020     RAIIMFObserverInstaller ObsInstall(*MF, WrapperObserver);
3021     for (const BasicBlock *BB : RPOT) {
3022       MachineBasicBlock &MBB = getMBB(*BB);
3023       // Set the insertion point of all the following translations to
3024       // the end of this basic block.
3025       CurBuilder->setMBB(MBB);
3026       HasTailCall = false;
3027       for (const Instruction &Inst : *BB) {
3028         // If we translated a tail call in the last step, then we know
3029         // everything after the call is either a return, or something that is
3030         // handled by the call itself. (E.g. a lifetime marker or assume
3031         // intrinsic.) In this case, we should stop translating the block and
3032         // move on.
3033         if (HasTailCall)
3034           break;
3035 #ifndef NDEBUG
3036         Verifier.setCurrentInst(&Inst);
3037 #endif // ifndef NDEBUG
3038         if (translate(Inst))
3039           continue;
3040 
3041         OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
3042                                    Inst.getDebugLoc(), BB);
3043         R << "unable to translate instruction: " << ore::NV("Opcode", &Inst);
3044 
3045         if (ORE->allowExtraAnalysis("gisel-irtranslator")) {
3046           std::string InstStrStorage;
3047           raw_string_ostream InstStr(InstStrStorage);
3048           InstStr << Inst;
3049 
3050           R << ": '" << InstStr.str() << "'";
3051         }
3052 
3053         reportTranslationError(*MF, *TPC, *ORE, R);
3054         return false;
3055       }
3056 
3057       finalizeBasicBlock();
3058     }
3059 #ifndef NDEBUG
3060     WrapperObserver.removeObserver(&Verifier);
3061 #endif
3062   }
3063 
3064   finishPendingPhis();
3065 
3066   SwiftError.propagateVRegs();
3067 
3068   // Merge the argument lowering and constants block with its single
3069   // successor, the LLVM-IR entry block.  We want the basic block to
3070   // be maximal.
3071   assert(EntryBB->succ_size() == 1 &&
3072          "Custom BB used for lowering should have only one successor");
3073   // Get the successor of the current entry block.
3074   MachineBasicBlock &NewEntryBB = **EntryBB->succ_begin();
3075   assert(NewEntryBB.pred_size() == 1 &&
3076          "LLVM-IR entry block has a predecessor!?");
3077   // Move all the instruction from the current entry block to the
3078   // new entry block.
3079   NewEntryBB.splice(NewEntryBB.begin(), EntryBB, EntryBB->begin(),
3080                     EntryBB->end());
3081 
3082   // Update the live-in information for the new entry block.
3083   for (const MachineBasicBlock::RegisterMaskPair &LiveIn : EntryBB->liveins())
3084     NewEntryBB.addLiveIn(LiveIn);
3085   NewEntryBB.sortUniqueLiveIns();
3086 
3087   // Get rid of the now empty basic block.
3088   EntryBB->removeSuccessor(&NewEntryBB);
3089   MF->remove(EntryBB);
3090   MF->DeleteMachineBasicBlock(EntryBB);
3091 
3092   assert(&MF->front() == &NewEntryBB &&
3093          "New entry wasn't next in the list of basic block!");
3094 
3095   // Initialize stack protector information.
3096   StackProtector &SP = getAnalysis<StackProtector>();
3097   SP.copyToMachineFrameInfo(MF->getFrameInfo());
3098 
3099   return false;
3100 }
3101