1 //===-- PPCISelDAGToDAG.cpp - PPC --pattern matching inst selector --------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines a pattern matching instruction selector for PowerPC,
11 // converting from a legalized dag to a PPC dag.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "MCTargetDesc/PPCMCTargetDesc.h"
16 #include "MCTargetDesc/PPCPredicates.h"
17 #include "PPC.h"
18 #include "PPCISelLowering.h"
19 #include "PPCMachineFunctionInfo.h"
20 #include "PPCSubtarget.h"
21 #include "PPCTargetMachine.h"
22 #include "llvm/ADT/APInt.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SmallPtrSet.h"
26 #include "llvm/ADT/SmallVector.h"
27 #include "llvm/ADT/Statistic.h"
28 #include "llvm/Analysis/BranchProbabilityInfo.h"
29 #include "llvm/CodeGen/FunctionLoweringInfo.h"
30 #include "llvm/CodeGen/ISDOpcodes.h"
31 #include "llvm/CodeGen/MachineBasicBlock.h"
32 #include "llvm/CodeGen/MachineFunction.h"
33 #include "llvm/CodeGen/MachineInstrBuilder.h"
34 #include "llvm/CodeGen/MachineRegisterInfo.h"
35 #include "llvm/CodeGen/SelectionDAG.h"
36 #include "llvm/CodeGen/SelectionDAGISel.h"
37 #include "llvm/CodeGen/SelectionDAGNodes.h"
38 #include "llvm/CodeGen/TargetInstrInfo.h"
39 #include "llvm/CodeGen/TargetRegisterInfo.h"
40 #include "llvm/CodeGen/ValueTypes.h"
41 #include "llvm/IR/BasicBlock.h"
42 #include "llvm/IR/DebugLoc.h"
43 #include "llvm/IR/Function.h"
44 #include "llvm/IR/GlobalValue.h"
45 #include "llvm/IR/InlineAsm.h"
46 #include "llvm/IR/InstrTypes.h"
47 #include "llvm/IR/Module.h"
48 #include "llvm/Support/Casting.h"
49 #include "llvm/Support/CodeGen.h"
50 #include "llvm/Support/CommandLine.h"
51 #include "llvm/Support/Compiler.h"
52 #include "llvm/Support/Debug.h"
53 #include "llvm/Support/ErrorHandling.h"
54 #include "llvm/Support/KnownBits.h"
55 #include "llvm/Support/MachineValueType.h"
56 #include "llvm/Support/MathExtras.h"
57 #include "llvm/Support/raw_ostream.h"
58 #include <algorithm>
59 #include <cassert>
60 #include <cstdint>
61 #include <iterator>
62 #include <limits>
63 #include <memory>
64 #include <new>
65 #include <tuple>
66 #include <utility>
67 
68 using namespace llvm;
69 
70 #define DEBUG_TYPE "ppc-codegen"
71 
72 STATISTIC(NumSextSetcc,
73           "Number of (sext(setcc)) nodes expanded into GPR sequence.");
74 STATISTIC(NumZextSetcc,
75           "Number of (zext(setcc)) nodes expanded into GPR sequence.");
76 STATISTIC(SignExtensionsAdded,
77           "Number of sign extensions for compare inputs added.");
78 STATISTIC(ZeroExtensionsAdded,
79           "Number of zero extensions for compare inputs added.");
80 STATISTIC(NumLogicOpsOnComparison,
81           "Number of logical ops on i1 values calculated in GPR.");
82 STATISTIC(OmittedForNonExtendUses,
83           "Number of compares not eliminated as they have non-extending uses.");
84 
85 // FIXME: Remove this once the bug has been fixed!
86 cl::opt<bool> ANDIGlueBug("expose-ppc-andi-glue-bug",
87 cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden);
88 
89 static cl::opt<bool>
90     UseBitPermRewriter("ppc-use-bit-perm-rewriter", cl::init(true),
91                        cl::desc("use aggressive ppc isel for bit permutations"),
92                        cl::Hidden);
93 static cl::opt<bool> BPermRewriterNoMasking(
94     "ppc-bit-perm-rewriter-stress-rotates",
95     cl::desc("stress rotate selection in aggressive ppc isel for "
96              "bit permutations"),
97     cl::Hidden);
98 
99 static cl::opt<bool> EnableBranchHint(
100   "ppc-use-branch-hint", cl::init(true),
101     cl::desc("Enable static hinting of branches on ppc"),
102     cl::Hidden);
103 
104 static cl::opt<bool> EnableTLSOpt(
105   "ppc-tls-opt", cl::init(true),
106     cl::desc("Enable tls optimization peephole"),
107     cl::Hidden);
108 
109 enum ICmpInGPRType { ICGPR_All, ICGPR_None, ICGPR_I32, ICGPR_I64,
110   ICGPR_NonExtIn, ICGPR_Zext, ICGPR_Sext, ICGPR_ZextI32,
111   ICGPR_SextI32, ICGPR_ZextI64, ICGPR_SextI64 };
112 
113 static cl::opt<ICmpInGPRType> CmpInGPR(
114   "ppc-gpr-icmps", cl::Hidden, cl::init(ICGPR_All),
115   cl::desc("Specify the types of comparisons to emit GPR-only code for."),
116   cl::values(clEnumValN(ICGPR_None, "none", "Do not modify integer comparisons."),
117              clEnumValN(ICGPR_All, "all", "All possible int comparisons in GPRs."),
118              clEnumValN(ICGPR_I32, "i32", "Only i32 comparisons in GPRs."),
119              clEnumValN(ICGPR_I64, "i64", "Only i64 comparisons in GPRs."),
120              clEnumValN(ICGPR_NonExtIn, "nonextin",
121                         "Only comparisons where inputs don't need [sz]ext."),
122              clEnumValN(ICGPR_Zext, "zext", "Only comparisons with zext result."),
123              clEnumValN(ICGPR_ZextI32, "zexti32",
124                         "Only i32 comparisons with zext result."),
125              clEnumValN(ICGPR_ZextI64, "zexti64",
126                         "Only i64 comparisons with zext result."),
127              clEnumValN(ICGPR_Sext, "sext", "Only comparisons with sext result."),
128              clEnumValN(ICGPR_SextI32, "sexti32",
129                         "Only i32 comparisons with sext result."),
130              clEnumValN(ICGPR_SextI64, "sexti64",
131                         "Only i64 comparisons with sext result.")));
132 namespace {
133 
134   //===--------------------------------------------------------------------===//
135   /// PPCDAGToDAGISel - PPC specific code to select PPC machine
136   /// instructions for SelectionDAG operations.
137   ///
138   class PPCDAGToDAGISel : public SelectionDAGISel {
139     const PPCTargetMachine &TM;
140     const PPCSubtarget *PPCSubTarget;
141     const PPCTargetLowering *PPCLowering;
142     unsigned GlobalBaseReg;
143 
144   public:
145     explicit PPCDAGToDAGISel(PPCTargetMachine &tm, CodeGenOpt::Level OptLevel)
146         : SelectionDAGISel(tm, OptLevel), TM(tm) {}
147 
148     bool runOnMachineFunction(MachineFunction &MF) override {
149       // Make sure we re-emit a set of the global base reg if necessary
150       GlobalBaseReg = 0;
151       PPCSubTarget = &MF.getSubtarget<PPCSubtarget>();
152       PPCLowering = PPCSubTarget->getTargetLowering();
153       SelectionDAGISel::runOnMachineFunction(MF);
154 
155       if (!PPCSubTarget->isSVR4ABI())
156         InsertVRSaveCode(MF);
157 
158       return true;
159     }
160 
161     void PreprocessISelDAG() override;
162     void PostprocessISelDAG() override;
163 
164     /// getI16Imm - Return a target constant with the specified value, of type
165     /// i16.
166     inline SDValue getI16Imm(unsigned Imm, const SDLoc &dl) {
167       return CurDAG->getTargetConstant(Imm, dl, MVT::i16);
168     }
169 
170     /// getI32Imm - Return a target constant with the specified value, of type
171     /// i32.
172     inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
173       return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
174     }
175 
176     /// getI64Imm - Return a target constant with the specified value, of type
177     /// i64.
178     inline SDValue getI64Imm(uint64_t Imm, const SDLoc &dl) {
179       return CurDAG->getTargetConstant(Imm, dl, MVT::i64);
180     }
181 
182     /// getSmallIPtrImm - Return a target constant of pointer type.
183     inline SDValue getSmallIPtrImm(unsigned Imm, const SDLoc &dl) {
184       return CurDAG->getTargetConstant(
185           Imm, dl, PPCLowering->getPointerTy(CurDAG->getDataLayout()));
186     }
187 
188     /// isRotateAndMask - Returns true if Mask and Shift can be folded into a
189     /// rotate and mask opcode and mask operation.
190     static bool isRotateAndMask(SDNode *N, unsigned Mask, bool isShiftMask,
191                                 unsigned &SH, unsigned &MB, unsigned &ME);
192 
193     /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC
194     /// base register.  Return the virtual register that holds this value.
195     SDNode *getGlobalBaseReg();
196 
197     void selectFrameIndex(SDNode *SN, SDNode *N, unsigned Offset = 0);
198 
199     // Select - Convert the specified operand from a target-independent to a
200     // target-specific node if it hasn't already been changed.
201     void Select(SDNode *N) override;
202 
203     bool tryBitfieldInsert(SDNode *N);
204     bool tryBitPermutation(SDNode *N);
205     bool tryIntCompareInGPR(SDNode *N);
206 
207     // tryTLSXFormLoad - Convert an ISD::LOAD fed by a PPCISD::ADD_TLS into
208     // an X-Form load instruction with the offset being a relocation coming from
209     // the PPCISD::ADD_TLS.
210     bool tryTLSXFormLoad(LoadSDNode *N);
211     // tryTLSXFormStore - Convert an ISD::STORE fed by a PPCISD::ADD_TLS into
212     // an X-Form store instruction with the offset being a relocation coming from
213     // the PPCISD::ADD_TLS.
214     bool tryTLSXFormStore(StoreSDNode *N);
215     /// SelectCC - Select a comparison of the specified values with the
216     /// specified condition code, returning the CR# of the expression.
217     SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
218                      const SDLoc &dl);
219 
220     /// SelectAddrImm - Returns true if the address N can be represented by
221     /// a base register plus a signed 16-bit displacement [r+imm].
222     bool SelectAddrImm(SDValue N, SDValue &Disp,
223                        SDValue &Base) {
224       return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 0);
225     }
226 
227     /// SelectAddrImmOffs - Return true if the operand is valid for a preinc
228     /// immediate field.  Note that the operand at this point is already the
229     /// result of a prior SelectAddressRegImm call.
230     bool SelectAddrImmOffs(SDValue N, SDValue &Out) const {
231       if (N.getOpcode() == ISD::TargetConstant ||
232           N.getOpcode() == ISD::TargetGlobalAddress) {
233         Out = N;
234         return true;
235       }
236 
237       return false;
238     }
239 
240     /// SelectAddrIdx - Given the specified addressed, check to see if it can be
241     /// represented as an indexed [r+r] operation.  Returns false if it can
242     /// be represented by [r+imm], which are preferred.
243     bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) {
244       return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG);
245     }
246 
247     /// SelectAddrIdxOnly - Given the specified addressed, force it to be
248     /// represented as an indexed [r+r] operation.
249     bool SelectAddrIdxOnly(SDValue N, SDValue &Base, SDValue &Index) {
250       return PPCLowering->SelectAddressRegRegOnly(N, Base, Index, *CurDAG);
251     }
252 
253     /// SelectAddrImmX4 - Returns true if the address N can be represented by
254     /// a base register plus a signed 16-bit displacement that is a multiple of 4.
255     /// Suitable for use by STD and friends.
256     bool SelectAddrImmX4(SDValue N, SDValue &Disp, SDValue &Base) {
257       return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 4);
258     }
259 
260     bool SelectAddrImmX16(SDValue N, SDValue &Disp, SDValue &Base) {
261       return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 16);
262     }
263 
264     // Select an address into a single register.
265     bool SelectAddr(SDValue N, SDValue &Base) {
266       Base = N;
267       return true;
268     }
269 
270     /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
271     /// inline asm expressions.  It is always correct to compute the value into
272     /// a register.  The case of adding a (possibly relocatable) constant to a
273     /// register can be improved, but it is wrong to substitute Reg+Reg for
274     /// Reg in an asm, because the load or store opcode would have to change.
275     bool SelectInlineAsmMemoryOperand(const SDValue &Op,
276                                       unsigned ConstraintID,
277                                       std::vector<SDValue> &OutOps) override {
278       switch(ConstraintID) {
279       default:
280         errs() << "ConstraintID: " << ConstraintID << "\n";
281         llvm_unreachable("Unexpected asm memory constraint");
282       case InlineAsm::Constraint_es:
283       case InlineAsm::Constraint_i:
284       case InlineAsm::Constraint_m:
285       case InlineAsm::Constraint_o:
286       case InlineAsm::Constraint_Q:
287       case InlineAsm::Constraint_Z:
288       case InlineAsm::Constraint_Zy:
289         // We need to make sure that this one operand does not end up in r0
290         // (because we might end up lowering this as 0(%op)).
291         const TargetRegisterInfo *TRI = PPCSubTarget->getRegisterInfo();
292         const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF, /*Kind=*/1);
293         SDLoc dl(Op);
294         SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i32);
295         SDValue NewOp =
296           SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
297                                          dl, Op.getValueType(),
298                                          Op, RC), 0);
299 
300         OutOps.push_back(NewOp);
301         return false;
302       }
303       return true;
304     }
305 
306     void InsertVRSaveCode(MachineFunction &MF);
307 
308     StringRef getPassName() const override {
309       return "PowerPC DAG->DAG Pattern Instruction Selection";
310     }
311 
312 // Include the pieces autogenerated from the target description.
313 #include "PPCGenDAGISel.inc"
314 
315 private:
316     bool trySETCC(SDNode *N);
317 
318     void PeepholePPC64();
319     void PeepholePPC64ZExt();
320     void PeepholeCROps();
321 
322     SDValue combineToCMPB(SDNode *N);
323     void foldBoolExts(SDValue &Res, SDNode *&N);
324 
325     bool AllUsersSelectZero(SDNode *N);
326     void SwapAllSelectUsers(SDNode *N);
327 
328     bool isOffsetMultipleOf(SDNode *N, unsigned Val) const;
329     void transferMemOperands(SDNode *N, SDNode *Result);
330   };
331 
332 } // end anonymous namespace
333 
334 /// InsertVRSaveCode - Once the entire function has been instruction selected,
335 /// all virtual registers are created and all machine instructions are built,
336 /// check to see if we need to save/restore VRSAVE.  If so, do it.
337 void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) {
338   // Check to see if this function uses vector registers, which means we have to
339   // save and restore the VRSAVE register and update it with the regs we use.
340   //
341   // In this case, there will be virtual registers of vector type created
342   // by the scheduler.  Detect them now.
343   bool HasVectorVReg = false;
344   for (unsigned i = 0, e = RegInfo->getNumVirtRegs(); i != e; ++i) {
345     unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
346     if (RegInfo->getRegClass(Reg) == &PPC::VRRCRegClass) {
347       HasVectorVReg = true;
348       break;
349     }
350   }
351   if (!HasVectorVReg) return;  // nothing to do.
352 
353   // If we have a vector register, we want to emit code into the entry and exit
354   // blocks to save and restore the VRSAVE register.  We do this here (instead
355   // of marking all vector instructions as clobbering VRSAVE) for two reasons:
356   //
357   // 1. This (trivially) reduces the load on the register allocator, by not
358   //    having to represent the live range of the VRSAVE register.
359   // 2. This (more significantly) allows us to create a temporary virtual
360   //    register to hold the saved VRSAVE value, allowing this temporary to be
361   //    register allocated, instead of forcing it to be spilled to the stack.
362 
363   // Create two vregs - one to hold the VRSAVE register that is live-in to the
364   // function and one for the value after having bits or'd into it.
365   unsigned InVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
366   unsigned UpdatedVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
367 
368   const TargetInstrInfo &TII = *PPCSubTarget->getInstrInfo();
369   MachineBasicBlock &EntryBB = *Fn.begin();
370   DebugLoc dl;
371   // Emit the following code into the entry block:
372   // InVRSAVE = MFVRSAVE
373   // UpdatedVRSAVE = UPDATE_VRSAVE InVRSAVE
374   // MTVRSAVE UpdatedVRSAVE
375   MachineBasicBlock::iterator IP = EntryBB.begin();  // Insert Point
376   BuildMI(EntryBB, IP, dl, TII.get(PPC::MFVRSAVE), InVRSAVE);
377   BuildMI(EntryBB, IP, dl, TII.get(PPC::UPDATE_VRSAVE),
378           UpdatedVRSAVE).addReg(InVRSAVE);
379   BuildMI(EntryBB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(UpdatedVRSAVE);
380 
381   // Find all return blocks, outputting a restore in each epilog.
382   for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
383     if (BB->isReturnBlock()) {
384       IP = BB->end(); --IP;
385 
386       // Skip over all terminator instructions, which are part of the return
387       // sequence.
388       MachineBasicBlock::iterator I2 = IP;
389       while (I2 != BB->begin() && (--I2)->isTerminator())
390         IP = I2;
391 
392       // Emit: MTVRSAVE InVRSave
393       BuildMI(*BB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(InVRSAVE);
394     }
395   }
396 }
397 
398 /// getGlobalBaseReg - Output the instructions required to put the
399 /// base address to use for accessing globals into a register.
400 ///
401 SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
402   if (!GlobalBaseReg) {
403     const TargetInstrInfo &TII = *PPCSubTarget->getInstrInfo();
404     // Insert the set of GlobalBaseReg into the first MBB of the function
405     MachineBasicBlock &FirstMBB = MF->front();
406     MachineBasicBlock::iterator MBBI = FirstMBB.begin();
407     const Module *M = MF->getFunction().getParent();
408     DebugLoc dl;
409 
410     if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) == MVT::i32) {
411       if (PPCSubTarget->isTargetELF()) {
412         GlobalBaseReg = PPC::R30;
413         if (M->getPICLevel() == PICLevel::SmallPIC) {
414           BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MoveGOTtoLR));
415           BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
416           MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
417         } else {
418           BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
419           BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
420           unsigned TempReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
421           BuildMI(FirstMBB, MBBI, dl,
422                   TII.get(PPC::UpdateGBR), GlobalBaseReg)
423                   .addReg(TempReg, RegState::Define).addReg(GlobalBaseReg);
424           MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
425         }
426       } else {
427         GlobalBaseReg =
428           RegInfo->createVirtualRegister(&PPC::GPRC_and_GPRC_NOR0RegClass);
429         BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
430         BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
431       }
432     } else {
433       // We must ensure that this sequence is dominated by the prologue.
434       // FIXME: This is a bit of a big hammer since we don't get the benefits
435       // of shrink-wrapping whenever we emit this instruction. Considering
436       // this is used in any function where we emit a jump table, this may be
437       // a significant limitation. We should consider inserting this in the
438       // block where it is used and then commoning this sequence up if it
439       // appears in multiple places.
440       // Note: on ISA 3.0 cores, we can use lnia (addpcis) insteand of
441       // MovePCtoLR8.
442       MF->getInfo<PPCFunctionInfo>()->setShrinkWrapDisabled(true);
443       GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
444       BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8));
445       BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR8), GlobalBaseReg);
446     }
447   }
448   return CurDAG->getRegister(GlobalBaseReg,
449                              PPCLowering->getPointerTy(CurDAG->getDataLayout()))
450       .getNode();
451 }
452 
453 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
454 /// operand. If so Imm will receive the 32-bit value.
455 static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
456   if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
457     Imm = cast<ConstantSDNode>(N)->getZExtValue();
458     return true;
459   }
460   return false;
461 }
462 
463 /// isInt64Immediate - This method tests to see if the node is a 64-bit constant
464 /// operand.  If so Imm will receive the 64-bit value.
465 static bool isInt64Immediate(SDNode *N, uint64_t &Imm) {
466   if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i64) {
467     Imm = cast<ConstantSDNode>(N)->getZExtValue();
468     return true;
469   }
470   return false;
471 }
472 
473 // isInt32Immediate - This method tests to see if a constant operand.
474 // If so Imm will receive the 32 bit value.
475 static bool isInt32Immediate(SDValue N, unsigned &Imm) {
476   return isInt32Immediate(N.getNode(), Imm);
477 }
478 
479 /// isInt64Immediate - This method tests to see if the value is a 64-bit
480 /// constant operand. If so Imm will receive the 64-bit value.
481 static bool isInt64Immediate(SDValue N, uint64_t &Imm) {
482   return isInt64Immediate(N.getNode(), Imm);
483 }
484 
485 static unsigned getBranchHint(unsigned PCC, FunctionLoweringInfo *FuncInfo,
486                               const SDValue &DestMBB) {
487   assert(isa<BasicBlockSDNode>(DestMBB));
488 
489   if (!FuncInfo->BPI) return PPC::BR_NO_HINT;
490 
491   const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
492   const TerminatorInst *BBTerm = BB->getTerminator();
493 
494   if (BBTerm->getNumSuccessors() != 2) return PPC::BR_NO_HINT;
495 
496   const BasicBlock *TBB = BBTerm->getSuccessor(0);
497   const BasicBlock *FBB = BBTerm->getSuccessor(1);
498 
499   auto TProb = FuncInfo->BPI->getEdgeProbability(BB, TBB);
500   auto FProb = FuncInfo->BPI->getEdgeProbability(BB, FBB);
501 
502   // We only want to handle cases which are easy to predict at static time, e.g.
503   // C++ throw statement, that is very likely not taken, or calling never
504   // returned function, e.g. stdlib exit(). So we set Threshold to filter
505   // unwanted cases.
506   //
507   // Below is LLVM branch weight table, we only want to handle case 1, 2
508   //
509   // Case                  Taken:Nontaken  Example
510   // 1. Unreachable        1048575:1       C++ throw, stdlib exit(),
511   // 2. Invoke-terminating 1:1048575
512   // 3. Coldblock          4:64            __builtin_expect
513   // 4. Loop Branch        124:4           For loop
514   // 5. PH/ZH/FPH          20:12
515   const uint32_t Threshold = 10000;
516 
517   if (std::max(TProb, FProb) / Threshold < std::min(TProb, FProb))
518     return PPC::BR_NO_HINT;
519 
520   DEBUG(dbgs() << "Use branch hint for '" << FuncInfo->Fn->getName() << "::"
521                << BB->getName() << "'\n"
522                << " -> " << TBB->getName() << ": " << TProb << "\n"
523                << " -> " << FBB->getName() << ": " << FProb << "\n");
524 
525   const BasicBlockSDNode *BBDN = cast<BasicBlockSDNode>(DestMBB);
526 
527   // If Dest BasicBlock is False-BasicBlock (FBB), swap branch probabilities,
528   // because we want 'TProb' stands for 'branch probability' to Dest BasicBlock
529   if (BBDN->getBasicBlock()->getBasicBlock() != TBB)
530     std::swap(TProb, FProb);
531 
532   return (TProb > FProb) ? PPC::BR_TAKEN_HINT : PPC::BR_NONTAKEN_HINT;
533 }
534 
535 // isOpcWithIntImmediate - This method tests to see if the node is a specific
536 // opcode and that it has a immediate integer right operand.
537 // If so Imm will receive the 32 bit value.
538 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
539   return N->getOpcode() == Opc
540          && isInt32Immediate(N->getOperand(1).getNode(), Imm);
541 }
542 
543 void PPCDAGToDAGISel::selectFrameIndex(SDNode *SN, SDNode *N, unsigned Offset) {
544   SDLoc dl(SN);
545   int FI = cast<FrameIndexSDNode>(N)->getIndex();
546   SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0));
547   unsigned Opc = N->getValueType(0) == MVT::i32 ? PPC::ADDI : PPC::ADDI8;
548   if (SN->hasOneUse())
549     CurDAG->SelectNodeTo(SN, Opc, N->getValueType(0), TFI,
550                          getSmallIPtrImm(Offset, dl));
551   else
552     ReplaceNode(SN, CurDAG->getMachineNode(Opc, dl, N->getValueType(0), TFI,
553                                            getSmallIPtrImm(Offset, dl)));
554 }
555 
556 bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask,
557                                       bool isShiftMask, unsigned &SH,
558                                       unsigned &MB, unsigned &ME) {
559   // Don't even go down this path for i64, since different logic will be
560   // necessary for rldicl/rldicr/rldimi.
561   if (N->getValueType(0) != MVT::i32)
562     return false;
563 
564   unsigned Shift  = 32;
565   unsigned Indeterminant = ~0;  // bit mask marking indeterminant results
566   unsigned Opcode = N->getOpcode();
567   if (N->getNumOperands() != 2 ||
568       !isInt32Immediate(N->getOperand(1).getNode(), Shift) || (Shift > 31))
569     return false;
570 
571   if (Opcode == ISD::SHL) {
572     // apply shift left to mask if it comes first
573     if (isShiftMask) Mask = Mask << Shift;
574     // determine which bits are made indeterminant by shift
575     Indeterminant = ~(0xFFFFFFFFu << Shift);
576   } else if (Opcode == ISD::SRL) {
577     // apply shift right to mask if it comes first
578     if (isShiftMask) Mask = Mask >> Shift;
579     // determine which bits are made indeterminant by shift
580     Indeterminant = ~(0xFFFFFFFFu >> Shift);
581     // adjust for the left rotate
582     Shift = 32 - Shift;
583   } else if (Opcode == ISD::ROTL) {
584     Indeterminant = 0;
585   } else {
586     return false;
587   }
588 
589   // if the mask doesn't intersect any Indeterminant bits
590   if (Mask && !(Mask & Indeterminant)) {
591     SH = Shift & 31;
592     // make sure the mask is still a mask (wrap arounds may not be)
593     return isRunOfOnes(Mask, MB, ME);
594   }
595   return false;
596 }
597 
598 bool PPCDAGToDAGISel::tryTLSXFormStore(StoreSDNode *ST) {
599   SDValue Base = ST->getBasePtr();
600   if (Base.getOpcode() != PPCISD::ADD_TLS)
601     return false;
602   SDValue Offset = ST->getOffset();
603   if (!Offset.isUndef())
604     return false;
605 
606   SDLoc dl(ST);
607   EVT MemVT = ST->getMemoryVT();
608   EVT RegVT = ST->getValue().getValueType();
609 
610   unsigned Opcode;
611   switch (MemVT.getSimpleVT().SimpleTy) {
612     default:
613       return false;
614     case MVT::i8: {
615       Opcode = (RegVT == MVT::i32) ? PPC::STBXTLS_32 : PPC::STBXTLS;
616       break;
617     }
618     case MVT::i16: {
619       Opcode = (RegVT == MVT::i32) ? PPC::STHXTLS_32 : PPC::STHXTLS;
620       break;
621     }
622     case MVT::i32: {
623       Opcode = (RegVT == MVT::i32) ? PPC::STWXTLS_32 : PPC::STWXTLS;
624       break;
625     }
626     case MVT::i64: {
627       Opcode = PPC::STDXTLS;
628       break;
629     }
630   }
631   SDValue Chain = ST->getChain();
632   SDVTList VTs = ST->getVTList();
633   SDValue Ops[] = {ST->getValue(), Base.getOperand(0), Base.getOperand(1),
634                    Chain};
635   SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
636   transferMemOperands(ST, MN);
637   ReplaceNode(ST, MN);
638   return true;
639 }
640 
641 bool PPCDAGToDAGISel::tryTLSXFormLoad(LoadSDNode *LD) {
642   SDValue Base = LD->getBasePtr();
643   if (Base.getOpcode() != PPCISD::ADD_TLS)
644     return false;
645   SDValue Offset = LD->getOffset();
646   if (!Offset.isUndef())
647     return false;
648 
649   SDLoc dl(LD);
650   EVT MemVT = LD->getMemoryVT();
651   EVT RegVT = LD->getValueType(0);
652   unsigned Opcode;
653   switch (MemVT.getSimpleVT().SimpleTy) {
654     default:
655       return false;
656     case MVT::i8: {
657       Opcode = (RegVT == MVT::i32) ? PPC::LBZXTLS_32 : PPC::LBZXTLS;
658       break;
659     }
660     case MVT::i16: {
661       Opcode = (RegVT == MVT::i32) ? PPC::LHZXTLS_32 : PPC::LHZXTLS;
662       break;
663     }
664     case MVT::i32: {
665       Opcode = (RegVT == MVT::i32) ? PPC::LWZXTLS_32 : PPC::LWZXTLS;
666       break;
667     }
668     case MVT::i64: {
669       Opcode = PPC::LDXTLS;
670       break;
671     }
672   }
673   SDValue Chain = LD->getChain();
674   SDVTList VTs = LD->getVTList();
675   SDValue Ops[] = {Base.getOperand(0), Base.getOperand(1), Chain};
676   SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
677   transferMemOperands(LD, MN);
678   ReplaceNode(LD, MN);
679   return true;
680 }
681 
682 /// Turn an or of two masked values into the rotate left word immediate then
683 /// mask insert (rlwimi) instruction.
684 bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) {
685   SDValue Op0 = N->getOperand(0);
686   SDValue Op1 = N->getOperand(1);
687   SDLoc dl(N);
688 
689   KnownBits LKnown, RKnown;
690   CurDAG->computeKnownBits(Op0, LKnown);
691   CurDAG->computeKnownBits(Op1, RKnown);
692 
693   unsigned TargetMask = LKnown.Zero.getZExtValue();
694   unsigned InsertMask = RKnown.Zero.getZExtValue();
695 
696   if ((TargetMask | InsertMask) == 0xFFFFFFFF) {
697     unsigned Op0Opc = Op0.getOpcode();
698     unsigned Op1Opc = Op1.getOpcode();
699     unsigned Value, SH = 0;
700     TargetMask = ~TargetMask;
701     InsertMask = ~InsertMask;
702 
703     // If the LHS has a foldable shift and the RHS does not, then swap it to the
704     // RHS so that we can fold the shift into the insert.
705     if (Op0Opc == ISD::AND && Op1Opc == ISD::AND) {
706       if (Op0.getOperand(0).getOpcode() == ISD::SHL ||
707           Op0.getOperand(0).getOpcode() == ISD::SRL) {
708         if (Op1.getOperand(0).getOpcode() != ISD::SHL &&
709             Op1.getOperand(0).getOpcode() != ISD::SRL) {
710           std::swap(Op0, Op1);
711           std::swap(Op0Opc, Op1Opc);
712           std::swap(TargetMask, InsertMask);
713         }
714       }
715     } else if (Op0Opc == ISD::SHL || Op0Opc == ISD::SRL) {
716       if (Op1Opc == ISD::AND && Op1.getOperand(0).getOpcode() != ISD::SHL &&
717           Op1.getOperand(0).getOpcode() != ISD::SRL) {
718         std::swap(Op0, Op1);
719         std::swap(Op0Opc, Op1Opc);
720         std::swap(TargetMask, InsertMask);
721       }
722     }
723 
724     unsigned MB, ME;
725     if (isRunOfOnes(InsertMask, MB, ME)) {
726       if ((Op1Opc == ISD::SHL || Op1Opc == ISD::SRL) &&
727           isInt32Immediate(Op1.getOperand(1), Value)) {
728         Op1 = Op1.getOperand(0);
729         SH  = (Op1Opc == ISD::SHL) ? Value : 32 - Value;
730       }
731       if (Op1Opc == ISD::AND) {
732        // The AND mask might not be a constant, and we need to make sure that
733        // if we're going to fold the masking with the insert, all bits not
734        // know to be zero in the mask are known to be one.
735         KnownBits MKnown;
736         CurDAG->computeKnownBits(Op1.getOperand(1), MKnown);
737         bool CanFoldMask = InsertMask == MKnown.One.getZExtValue();
738 
739         unsigned SHOpc = Op1.getOperand(0).getOpcode();
740         if ((SHOpc == ISD::SHL || SHOpc == ISD::SRL) && CanFoldMask &&
741             isInt32Immediate(Op1.getOperand(0).getOperand(1), Value)) {
742           // Note that Value must be in range here (less than 32) because
743           // otherwise there would not be any bits set in InsertMask.
744           Op1 = Op1.getOperand(0).getOperand(0);
745           SH  = (SHOpc == ISD::SHL) ? Value : 32 - Value;
746         }
747       }
748 
749       SH &= 31;
750       SDValue Ops[] = { Op0, Op1, getI32Imm(SH, dl), getI32Imm(MB, dl),
751                           getI32Imm(ME, dl) };
752       ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops));
753       return true;
754     }
755   }
756   return false;
757 }
758 
759 // Predict the number of instructions that would be generated by calling
760 // selectI64Imm(N).
761 static unsigned selectI64ImmInstrCountDirect(int64_t Imm) {
762   // Assume no remaining bits.
763   unsigned Remainder = 0;
764   // Assume no shift required.
765   unsigned Shift = 0;
766 
767   // If it can't be represented as a 32 bit value.
768   if (!isInt<32>(Imm)) {
769     Shift = countTrailingZeros<uint64_t>(Imm);
770     int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
771 
772     // If the shifted value fits 32 bits.
773     if (isInt<32>(ImmSh)) {
774       // Go with the shifted value.
775       Imm = ImmSh;
776     } else {
777       // Still stuck with a 64 bit value.
778       Remainder = Imm;
779       Shift = 32;
780       Imm >>= 32;
781     }
782   }
783 
784   // Intermediate operand.
785   unsigned Result = 0;
786 
787   // Handle first 32 bits.
788   unsigned Lo = Imm & 0xFFFF;
789 
790   // Simple value.
791   if (isInt<16>(Imm)) {
792     // Just the Lo bits.
793     ++Result;
794   } else if (Lo) {
795     // Handle the Hi bits and Lo bits.
796     Result += 2;
797   } else {
798     // Just the Hi bits.
799     ++Result;
800   }
801 
802   // If no shift, we're done.
803   if (!Shift) return Result;
804 
805   // If Hi word == Lo word,
806   // we can use rldimi to insert the Lo word into Hi word.
807   if ((unsigned)(Imm & 0xFFFFFFFF) == Remainder) {
808     ++Result;
809     return Result;
810   }
811 
812   // Shift for next step if the upper 32-bits were not zero.
813   if (Imm)
814     ++Result;
815 
816   // Add in the last bits as required.
817   if ((Remainder >> 16) & 0xFFFF)
818     ++Result;
819   if (Remainder & 0xFFFF)
820     ++Result;
821 
822   return Result;
823 }
824 
825 static uint64_t Rot64(uint64_t Imm, unsigned R) {
826   return (Imm << R) | (Imm >> (64 - R));
827 }
828 
829 static unsigned selectI64ImmInstrCount(int64_t Imm) {
830   unsigned Count = selectI64ImmInstrCountDirect(Imm);
831 
832   // If the instruction count is 1 or 2, we do not need further analysis
833   // since rotate + load constant requires at least 2 instructions.
834   if (Count <= 2)
835     return Count;
836 
837   for (unsigned r = 1; r < 63; ++r) {
838     uint64_t RImm = Rot64(Imm, r);
839     unsigned RCount = selectI64ImmInstrCountDirect(RImm) + 1;
840     Count = std::min(Count, RCount);
841 
842     // See comments in selectI64Imm for an explanation of the logic below.
843     unsigned LS = findLastSet(RImm);
844     if (LS != r-1)
845       continue;
846 
847     uint64_t OnesMask = -(int64_t) (UINT64_C(1) << (LS+1));
848     uint64_t RImmWithOnes = RImm | OnesMask;
849 
850     RCount = selectI64ImmInstrCountDirect(RImmWithOnes) + 1;
851     Count = std::min(Count, RCount);
852   }
853 
854   return Count;
855 }
856 
857 // Select a 64-bit constant. For cost-modeling purposes, selectI64ImmInstrCount
858 // (above) needs to be kept in sync with this function.
859 static SDNode *selectI64ImmDirect(SelectionDAG *CurDAG, const SDLoc &dl,
860                                   int64_t Imm) {
861   // Assume no remaining bits.
862   unsigned Remainder = 0;
863   // Assume no shift required.
864   unsigned Shift = 0;
865 
866   // If it can't be represented as a 32 bit value.
867   if (!isInt<32>(Imm)) {
868     Shift = countTrailingZeros<uint64_t>(Imm);
869     int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
870 
871     // If the shifted value fits 32 bits.
872     if (isInt<32>(ImmSh)) {
873       // Go with the shifted value.
874       Imm = ImmSh;
875     } else {
876       // Still stuck with a 64 bit value.
877       Remainder = Imm;
878       Shift = 32;
879       Imm >>= 32;
880     }
881   }
882 
883   // Intermediate operand.
884   SDNode *Result;
885 
886   // Handle first 32 bits.
887   unsigned Lo = Imm & 0xFFFF;
888   unsigned Hi = (Imm >> 16) & 0xFFFF;
889 
890   auto getI32Imm = [CurDAG, dl](unsigned Imm) {
891       return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
892   };
893 
894   // Simple value.
895   if (isInt<16>(Imm)) {
896     uint64_t SextImm = SignExtend64(Lo, 16);
897     SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64);
898     // Just the Lo bits.
899     Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm);
900   } else if (Lo) {
901     // Handle the Hi bits.
902     unsigned OpC = Hi ? PPC::LIS8 : PPC::LI8;
903     Result = CurDAG->getMachineNode(OpC, dl, MVT::i64, getI32Imm(Hi));
904     // And Lo bits.
905     Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
906                                     SDValue(Result, 0), getI32Imm(Lo));
907   } else {
908     // Just the Hi bits.
909     Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(Hi));
910   }
911 
912   // If no shift, we're done.
913   if (!Shift) return Result;
914 
915   // If Hi word == Lo word,
916   // we can use rldimi to insert the Lo word into Hi word.
917   if ((unsigned)(Imm & 0xFFFFFFFF) == Remainder) {
918     SDValue Ops[] =
919       { SDValue(Result, 0), SDValue(Result, 0), getI32Imm(Shift), getI32Imm(0)};
920     return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
921   }
922 
923   // Shift for next step if the upper 32-bits were not zero.
924   if (Imm) {
925     Result = CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64,
926                                     SDValue(Result, 0),
927                                     getI32Imm(Shift),
928                                     getI32Imm(63 - Shift));
929   }
930 
931   // Add in the last bits as required.
932   if ((Hi = (Remainder >> 16) & 0xFFFF)) {
933     Result = CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64,
934                                     SDValue(Result, 0), getI32Imm(Hi));
935   }
936   if ((Lo = Remainder & 0xFFFF)) {
937     Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
938                                     SDValue(Result, 0), getI32Imm(Lo));
939   }
940 
941   return Result;
942 }
943 
944 static SDNode *selectI64Imm(SelectionDAG *CurDAG, const SDLoc &dl,
945                             int64_t Imm) {
946   unsigned Count = selectI64ImmInstrCountDirect(Imm);
947 
948   // If the instruction count is 1 or 2, we do not need further analysis
949   // since rotate + load constant requires at least 2 instructions.
950   if (Count <= 2)
951     return selectI64ImmDirect(CurDAG, dl, Imm);
952 
953   unsigned RMin = 0;
954 
955   int64_t MatImm;
956   unsigned MaskEnd;
957 
958   for (unsigned r = 1; r < 63; ++r) {
959     uint64_t RImm = Rot64(Imm, r);
960     unsigned RCount = selectI64ImmInstrCountDirect(RImm) + 1;
961     if (RCount < Count) {
962       Count = RCount;
963       RMin = r;
964       MatImm = RImm;
965       MaskEnd = 63;
966     }
967 
968     // If the immediate to generate has many trailing zeros, it might be
969     // worthwhile to generate a rotated value with too many leading ones
970     // (because that's free with li/lis's sign-extension semantics), and then
971     // mask them off after rotation.
972 
973     unsigned LS = findLastSet(RImm);
974     // We're adding (63-LS) higher-order ones, and we expect to mask them off
975     // after performing the inverse rotation by (64-r). So we need that:
976     //   63-LS == 64-r => LS == r-1
977     if (LS != r-1)
978       continue;
979 
980     uint64_t OnesMask = -(int64_t) (UINT64_C(1) << (LS+1));
981     uint64_t RImmWithOnes = RImm | OnesMask;
982 
983     RCount = selectI64ImmInstrCountDirect(RImmWithOnes) + 1;
984     if (RCount < Count) {
985       Count = RCount;
986       RMin = r;
987       MatImm = RImmWithOnes;
988       MaskEnd = LS;
989     }
990   }
991 
992   if (!RMin)
993     return selectI64ImmDirect(CurDAG, dl, Imm);
994 
995   auto getI32Imm = [CurDAG, dl](unsigned Imm) {
996       return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
997   };
998 
999   SDValue Val = SDValue(selectI64ImmDirect(CurDAG, dl, MatImm), 0);
1000   return CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Val,
1001                                 getI32Imm(64 - RMin), getI32Imm(MaskEnd));
1002 }
1003 
1004 static unsigned allUsesTruncate(SelectionDAG *CurDAG, SDNode *N) {
1005   unsigned MaxTruncation = 0;
1006   // Cannot use range-based for loop here as we need the actual use (i.e. we
1007   // need the operand number corresponding to the use). A range-based for
1008   // will unbox the use and provide an SDNode*.
1009   for (SDNode::use_iterator Use = N->use_begin(), UseEnd = N->use_end();
1010        Use != UseEnd; ++Use) {
1011     unsigned Opc =
1012       Use->isMachineOpcode() ? Use->getMachineOpcode() : Use->getOpcode();
1013     switch (Opc) {
1014     default: return 0;
1015     case ISD::TRUNCATE:
1016       if (Use->isMachineOpcode())
1017         return 0;
1018       MaxTruncation =
1019         std::max(MaxTruncation, Use->getValueType(0).getSizeInBits());
1020       continue;
1021     case ISD::STORE: {
1022       if (Use->isMachineOpcode())
1023         return 0;
1024       StoreSDNode *STN = cast<StoreSDNode>(*Use);
1025       unsigned MemVTSize = STN->getMemoryVT().getSizeInBits();
1026       if (MemVTSize == 64 || Use.getOperandNo() != 0)
1027         return 0;
1028       MaxTruncation = std::max(MaxTruncation, MemVTSize);
1029       continue;
1030     }
1031     case PPC::STW8:
1032     case PPC::STWX8:
1033     case PPC::STWU8:
1034     case PPC::STWUX8:
1035       if (Use.getOperandNo() != 0)
1036         return 0;
1037       MaxTruncation = std::max(MaxTruncation, 32u);
1038       continue;
1039     case PPC::STH8:
1040     case PPC::STHX8:
1041     case PPC::STHU8:
1042     case PPC::STHUX8:
1043       if (Use.getOperandNo() != 0)
1044         return 0;
1045       MaxTruncation = std::max(MaxTruncation, 16u);
1046       continue;
1047     case PPC::STB8:
1048     case PPC::STBX8:
1049     case PPC::STBU8:
1050     case PPC::STBUX8:
1051       if (Use.getOperandNo() != 0)
1052         return 0;
1053       MaxTruncation = std::max(MaxTruncation, 8u);
1054       continue;
1055     }
1056   }
1057   return MaxTruncation;
1058 }
1059 
1060 // Select a 64-bit constant.
1061 static SDNode *selectI64Imm(SelectionDAG *CurDAG, SDNode *N) {
1062   SDLoc dl(N);
1063 
1064   // Get 64 bit value.
1065   int64_t Imm = cast<ConstantSDNode>(N)->getZExtValue();
1066   if (unsigned MinSize = allUsesTruncate(CurDAG, N)) {
1067     uint64_t SextImm = SignExtend64(Imm, MinSize);
1068     SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64);
1069     if (isInt<16>(SextImm))
1070       return CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm);
1071   }
1072   return selectI64Imm(CurDAG, dl, Imm);
1073 }
1074 
1075 namespace {
1076 
1077 class BitPermutationSelector {
1078   struct ValueBit {
1079     SDValue V;
1080 
1081     // The bit number in the value, using a convention where bit 0 is the
1082     // lowest-order bit.
1083     unsigned Idx;
1084 
1085     enum Kind {
1086       ConstZero,
1087       Variable
1088     } K;
1089 
1090     ValueBit(SDValue V, unsigned I, Kind K = Variable)
1091       : V(V), Idx(I), K(K) {}
1092     ValueBit(Kind K = Variable)
1093       : V(SDValue(nullptr, 0)), Idx(UINT32_MAX), K(K) {}
1094 
1095     bool isZero() const {
1096       return K == ConstZero;
1097     }
1098 
1099     bool hasValue() const {
1100       return K == Variable;
1101     }
1102 
1103     SDValue getValue() const {
1104       assert(hasValue() && "Cannot get the value of a constant bit");
1105       return V;
1106     }
1107 
1108     unsigned getValueBitIndex() const {
1109       assert(hasValue() && "Cannot get the value bit index of a constant bit");
1110       return Idx;
1111     }
1112   };
1113 
1114   // A bit group has the same underlying value and the same rotate factor.
1115   struct BitGroup {
1116     SDValue V;
1117     unsigned RLAmt;
1118     unsigned StartIdx, EndIdx;
1119 
1120     // This rotation amount assumes that the lower 32 bits of the quantity are
1121     // replicated in the high 32 bits by the rotation operator (which is done
1122     // by rlwinm and friends in 64-bit mode).
1123     bool Repl32;
1124     // Did converting to Repl32 == true change the rotation factor? If it did,
1125     // it decreased it by 32.
1126     bool Repl32CR;
1127     // Was this group coalesced after setting Repl32 to true?
1128     bool Repl32Coalesced;
1129 
1130     BitGroup(SDValue V, unsigned R, unsigned S, unsigned E)
1131       : V(V), RLAmt(R), StartIdx(S), EndIdx(E), Repl32(false), Repl32CR(false),
1132         Repl32Coalesced(false) {
1133       DEBUG(dbgs() << "\tbit group for " << V.getNode() << " RLAmt = " << R <<
1134                       " [" << S << ", " << E << "]\n");
1135     }
1136   };
1137 
1138   // Information on each (Value, RLAmt) pair (like the number of groups
1139   // associated with each) used to choose the lowering method.
1140   struct ValueRotInfo {
1141     SDValue V;
1142     unsigned RLAmt = std::numeric_limits<unsigned>::max();
1143     unsigned NumGroups = 0;
1144     unsigned FirstGroupStartIdx = std::numeric_limits<unsigned>::max();
1145     bool Repl32 = false;
1146 
1147     ValueRotInfo() = default;
1148 
1149     // For sorting (in reverse order) by NumGroups, and then by
1150     // FirstGroupStartIdx.
1151     bool operator < (const ValueRotInfo &Other) const {
1152       // We need to sort so that the non-Repl32 come first because, when we're
1153       // doing masking, the Repl32 bit groups might be subsumed into the 64-bit
1154       // masking operation.
1155       if (Repl32 < Other.Repl32)
1156         return true;
1157       else if (Repl32 > Other.Repl32)
1158         return false;
1159       else if (NumGroups > Other.NumGroups)
1160         return true;
1161       else if (NumGroups < Other.NumGroups)
1162         return false;
1163       else if (FirstGroupStartIdx < Other.FirstGroupStartIdx)
1164         return true;
1165       return false;
1166     }
1167   };
1168 
1169   using ValueBitsMemoizedValue = std::pair<bool, SmallVector<ValueBit, 64>>;
1170   using ValueBitsMemoizer =
1171       DenseMap<SDValue, std::unique_ptr<ValueBitsMemoizedValue>>;
1172   ValueBitsMemoizer Memoizer;
1173 
1174   // Return a pair of bool and a SmallVector pointer to a memoization entry.
1175   // The bool is true if something interesting was deduced, otherwise if we're
1176   // providing only a generic representation of V (or something else likewise
1177   // uninteresting for instruction selection) through the SmallVector.
1178   std::pair<bool, SmallVector<ValueBit, 64> *> getValueBits(SDValue V,
1179                                                             unsigned NumBits) {
1180     auto &ValueEntry = Memoizer[V];
1181     if (ValueEntry)
1182       return std::make_pair(ValueEntry->first, &ValueEntry->second);
1183     ValueEntry.reset(new ValueBitsMemoizedValue());
1184     bool &Interesting = ValueEntry->first;
1185     SmallVector<ValueBit, 64> &Bits = ValueEntry->second;
1186     Bits.resize(NumBits);
1187 
1188     switch (V.getOpcode()) {
1189     default: break;
1190     case ISD::ROTL:
1191       if (isa<ConstantSDNode>(V.getOperand(1))) {
1192         unsigned RotAmt = V.getConstantOperandVal(1);
1193 
1194         const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1195 
1196         for (unsigned i = 0; i < NumBits; ++i)
1197           Bits[i] = LHSBits[i < RotAmt ? i + (NumBits - RotAmt) : i - RotAmt];
1198 
1199         return std::make_pair(Interesting = true, &Bits);
1200       }
1201       break;
1202     case ISD::SHL:
1203       if (isa<ConstantSDNode>(V.getOperand(1))) {
1204         unsigned ShiftAmt = V.getConstantOperandVal(1);
1205 
1206         const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1207 
1208         for (unsigned i = ShiftAmt; i < NumBits; ++i)
1209           Bits[i] = LHSBits[i - ShiftAmt];
1210 
1211         for (unsigned i = 0; i < ShiftAmt; ++i)
1212           Bits[i] = ValueBit(ValueBit::ConstZero);
1213 
1214         return std::make_pair(Interesting = true, &Bits);
1215       }
1216       break;
1217     case ISD::SRL:
1218       if (isa<ConstantSDNode>(V.getOperand(1))) {
1219         unsigned ShiftAmt = V.getConstantOperandVal(1);
1220 
1221         const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1222 
1223         for (unsigned i = 0; i < NumBits - ShiftAmt; ++i)
1224           Bits[i] = LHSBits[i + ShiftAmt];
1225 
1226         for (unsigned i = NumBits - ShiftAmt; i < NumBits; ++i)
1227           Bits[i] = ValueBit(ValueBit::ConstZero);
1228 
1229         return std::make_pair(Interesting = true, &Bits);
1230       }
1231       break;
1232     case ISD::AND:
1233       if (isa<ConstantSDNode>(V.getOperand(1))) {
1234         uint64_t Mask = V.getConstantOperandVal(1);
1235 
1236         const SmallVector<ValueBit, 64> *LHSBits;
1237         // Mark this as interesting, only if the LHS was also interesting. This
1238         // prevents the overall procedure from matching a single immediate 'and'
1239         // (which is non-optimal because such an and might be folded with other
1240         // things if we don't select it here).
1241         std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0), NumBits);
1242 
1243         for (unsigned i = 0; i < NumBits; ++i)
1244           if (((Mask >> i) & 1) == 1)
1245             Bits[i] = (*LHSBits)[i];
1246           else
1247             Bits[i] = ValueBit(ValueBit::ConstZero);
1248 
1249         return std::make_pair(Interesting, &Bits);
1250       }
1251       break;
1252     case ISD::OR: {
1253       const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1254       const auto &RHSBits = *getValueBits(V.getOperand(1), NumBits).second;
1255 
1256       bool AllDisjoint = true;
1257       for (unsigned i = 0; i < NumBits; ++i)
1258         if (LHSBits[i].isZero())
1259           Bits[i] = RHSBits[i];
1260         else if (RHSBits[i].isZero())
1261           Bits[i] = LHSBits[i];
1262         else {
1263           AllDisjoint = false;
1264           break;
1265         }
1266 
1267       if (!AllDisjoint)
1268         break;
1269 
1270       return std::make_pair(Interesting = true, &Bits);
1271     }
1272     case ISD::ZERO_EXTEND: {
1273       // We support only the case with zero extension from i32 to i64 so far.
1274       if (V.getValueType() != MVT::i64 ||
1275           V.getOperand(0).getValueType() != MVT::i32)
1276         break;
1277 
1278       const SmallVector<ValueBit, 64> *LHSBits;
1279       const unsigned NumOperandBits = 32;
1280       std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0),
1281                                                     NumOperandBits);
1282 
1283       for (unsigned i = 0; i < NumOperandBits; ++i)
1284         Bits[i] = (*LHSBits)[i];
1285 
1286       for (unsigned i = NumOperandBits; i < NumBits; ++i)
1287         Bits[i] = ValueBit(ValueBit::ConstZero);
1288 
1289       return std::make_pair(Interesting, &Bits);
1290       }
1291     }
1292 
1293     for (unsigned i = 0; i < NumBits; ++i)
1294       Bits[i] = ValueBit(V, i);
1295 
1296     return std::make_pair(Interesting = false, &Bits);
1297   }
1298 
1299   // For each value (except the constant ones), compute the left-rotate amount
1300   // to get it from its original to final position.
1301   void computeRotationAmounts() {
1302     HasZeros = false;
1303     RLAmt.resize(Bits.size());
1304     for (unsigned i = 0; i < Bits.size(); ++i)
1305       if (Bits[i].hasValue()) {
1306         unsigned VBI = Bits[i].getValueBitIndex();
1307         if (i >= VBI)
1308           RLAmt[i] = i - VBI;
1309         else
1310           RLAmt[i] = Bits.size() - (VBI - i);
1311       } else if (Bits[i].isZero()) {
1312         HasZeros = true;
1313         RLAmt[i] = UINT32_MAX;
1314       } else {
1315         llvm_unreachable("Unknown value bit type");
1316       }
1317   }
1318 
1319   // Collect groups of consecutive bits with the same underlying value and
1320   // rotation factor. If we're doing late masking, we ignore zeros, otherwise
1321   // they break up groups.
1322   void collectBitGroups(bool LateMask) {
1323     BitGroups.clear();
1324 
1325     unsigned LastRLAmt = RLAmt[0];
1326     SDValue LastValue = Bits[0].hasValue() ? Bits[0].getValue() : SDValue();
1327     unsigned LastGroupStartIdx = 0;
1328     for (unsigned i = 1; i < Bits.size(); ++i) {
1329       unsigned ThisRLAmt = RLAmt[i];
1330       SDValue ThisValue = Bits[i].hasValue() ? Bits[i].getValue() : SDValue();
1331       if (LateMask && !ThisValue) {
1332         ThisValue = LastValue;
1333         ThisRLAmt = LastRLAmt;
1334         // If we're doing late masking, then the first bit group always starts
1335         // at zero (even if the first bits were zero).
1336         if (BitGroups.empty())
1337           LastGroupStartIdx = 0;
1338       }
1339 
1340       // If this bit has the same underlying value and the same rotate factor as
1341       // the last one, then they're part of the same group.
1342       if (ThisRLAmt == LastRLAmt && ThisValue == LastValue)
1343         continue;
1344 
1345       if (LastValue.getNode())
1346         BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
1347                                      i-1));
1348       LastRLAmt = ThisRLAmt;
1349       LastValue = ThisValue;
1350       LastGroupStartIdx = i;
1351     }
1352     if (LastValue.getNode())
1353       BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
1354                                    Bits.size()-1));
1355 
1356     if (BitGroups.empty())
1357       return;
1358 
1359     // We might be able to combine the first and last groups.
1360     if (BitGroups.size() > 1) {
1361       // If the first and last groups are the same, then remove the first group
1362       // in favor of the last group, making the ending index of the last group
1363       // equal to the ending index of the to-be-removed first group.
1364       if (BitGroups[0].StartIdx == 0 &&
1365           BitGroups[BitGroups.size()-1].EndIdx == Bits.size()-1 &&
1366           BitGroups[0].V == BitGroups[BitGroups.size()-1].V &&
1367           BitGroups[0].RLAmt == BitGroups[BitGroups.size()-1].RLAmt) {
1368         DEBUG(dbgs() << "\tcombining final bit group with initial one\n");
1369         BitGroups[BitGroups.size()-1].EndIdx = BitGroups[0].EndIdx;
1370         BitGroups.erase(BitGroups.begin());
1371       }
1372     }
1373   }
1374 
1375   // Take all (SDValue, RLAmt) pairs and sort them by the number of groups
1376   // associated with each. If there is a degeneracy, pick the one that occurs
1377   // first (in the final value).
1378   void collectValueRotInfo() {
1379     ValueRots.clear();
1380 
1381     for (auto &BG : BitGroups) {
1382       unsigned RLAmtKey = BG.RLAmt + (BG.Repl32 ? 64 : 0);
1383       ValueRotInfo &VRI = ValueRots[std::make_pair(BG.V, RLAmtKey)];
1384       VRI.V = BG.V;
1385       VRI.RLAmt = BG.RLAmt;
1386       VRI.Repl32 = BG.Repl32;
1387       VRI.NumGroups += 1;
1388       VRI.FirstGroupStartIdx = std::min(VRI.FirstGroupStartIdx, BG.StartIdx);
1389     }
1390 
1391     // Now that we've collected the various ValueRotInfo instances, we need to
1392     // sort them.
1393     ValueRotsVec.clear();
1394     for (auto &I : ValueRots) {
1395       ValueRotsVec.push_back(I.second);
1396     }
1397     std::sort(ValueRotsVec.begin(), ValueRotsVec.end());
1398   }
1399 
1400   // In 64-bit mode, rlwinm and friends have a rotation operator that
1401   // replicates the low-order 32 bits into the high-order 32-bits. The mask
1402   // indices of these instructions can only be in the lower 32 bits, so they
1403   // can only represent some 64-bit bit groups. However, when they can be used,
1404   // the 32-bit replication can be used to represent, as a single bit group,
1405   // otherwise separate bit groups. We'll convert to replicated-32-bit bit
1406   // groups when possible. Returns true if any of the bit groups were
1407   // converted.
1408   void assignRepl32BitGroups() {
1409     // If we have bits like this:
1410     //
1411     // Indices:    15 14 13 12 11 10 9 8  7  6  5  4  3  2  1  0
1412     // V bits: ... 7  6  5  4  3  2  1 0 31 30 29 28 27 26 25 24
1413     // Groups:    |      RLAmt = 8      |      RLAmt = 40       |
1414     //
1415     // But, making use of a 32-bit operation that replicates the low-order 32
1416     // bits into the high-order 32 bits, this can be one bit group with a RLAmt
1417     // of 8.
1418 
1419     auto IsAllLow32 = [this](BitGroup & BG) {
1420       if (BG.StartIdx <= BG.EndIdx) {
1421         for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i) {
1422           if (!Bits[i].hasValue())
1423             continue;
1424           if (Bits[i].getValueBitIndex() >= 32)
1425             return false;
1426         }
1427       } else {
1428         for (unsigned i = BG.StartIdx; i < Bits.size(); ++i) {
1429           if (!Bits[i].hasValue())
1430             continue;
1431           if (Bits[i].getValueBitIndex() >= 32)
1432             return false;
1433         }
1434         for (unsigned i = 0; i <= BG.EndIdx; ++i) {
1435           if (!Bits[i].hasValue())
1436             continue;
1437           if (Bits[i].getValueBitIndex() >= 32)
1438             return false;
1439         }
1440       }
1441 
1442       return true;
1443     };
1444 
1445     for (auto &BG : BitGroups) {
1446       if (BG.StartIdx < 32 && BG.EndIdx < 32) {
1447         if (IsAllLow32(BG)) {
1448           if (BG.RLAmt >= 32) {
1449             BG.RLAmt -= 32;
1450             BG.Repl32CR = true;
1451           }
1452 
1453           BG.Repl32 = true;
1454 
1455           DEBUG(dbgs() << "\t32-bit replicated bit group for " <<
1456                           BG.V.getNode() << " RLAmt = " << BG.RLAmt <<
1457                           " [" << BG.StartIdx << ", " << BG.EndIdx << "]\n");
1458         }
1459       }
1460     }
1461 
1462     // Now walk through the bit groups, consolidating where possible.
1463     for (auto I = BitGroups.begin(); I != BitGroups.end();) {
1464       // We might want to remove this bit group by merging it with the previous
1465       // group (which might be the ending group).
1466       auto IP = (I == BitGroups.begin()) ?
1467                 std::prev(BitGroups.end()) : std::prev(I);
1468       if (I->Repl32 && IP->Repl32 && I->V == IP->V && I->RLAmt == IP->RLAmt &&
1469           I->StartIdx == (IP->EndIdx + 1) % 64 && I != IP) {
1470 
1471         DEBUG(dbgs() << "\tcombining 32-bit replicated bit group for " <<
1472                         I->V.getNode() << " RLAmt = " << I->RLAmt <<
1473                         " [" << I->StartIdx << ", " << I->EndIdx <<
1474                         "] with group with range [" <<
1475                         IP->StartIdx << ", " << IP->EndIdx << "]\n");
1476 
1477         IP->EndIdx = I->EndIdx;
1478         IP->Repl32CR = IP->Repl32CR || I->Repl32CR;
1479         IP->Repl32Coalesced = true;
1480         I = BitGroups.erase(I);
1481         continue;
1482       } else {
1483         // There is a special case worth handling: If there is a single group
1484         // covering the entire upper 32 bits, and it can be merged with both
1485         // the next and previous groups (which might be the same group), then
1486         // do so. If it is the same group (so there will be only one group in
1487         // total), then we need to reverse the order of the range so that it
1488         // covers the entire 64 bits.
1489         if (I->StartIdx == 32 && I->EndIdx == 63) {
1490           assert(std::next(I) == BitGroups.end() &&
1491                  "bit group ends at index 63 but there is another?");
1492           auto IN = BitGroups.begin();
1493 
1494           if (IP->Repl32 && IN->Repl32 && I->V == IP->V && I->V == IN->V &&
1495               (I->RLAmt % 32) == IP->RLAmt && (I->RLAmt % 32) == IN->RLAmt &&
1496               IP->EndIdx == 31 && IN->StartIdx == 0 && I != IP &&
1497               IsAllLow32(*I)) {
1498 
1499             DEBUG(dbgs() << "\tcombining bit group for " <<
1500                             I->V.getNode() << " RLAmt = " << I->RLAmt <<
1501                             " [" << I->StartIdx << ", " << I->EndIdx <<
1502                             "] with 32-bit replicated groups with ranges [" <<
1503                             IP->StartIdx << ", " << IP->EndIdx << "] and [" <<
1504                             IN->StartIdx << ", " << IN->EndIdx << "]\n");
1505 
1506             if (IP == IN) {
1507               // There is only one other group; change it to cover the whole
1508               // range (backward, so that it can still be Repl32 but cover the
1509               // whole 64-bit range).
1510               IP->StartIdx = 31;
1511               IP->EndIdx = 30;
1512               IP->Repl32CR = IP->Repl32CR || I->RLAmt >= 32;
1513               IP->Repl32Coalesced = true;
1514               I = BitGroups.erase(I);
1515             } else {
1516               // There are two separate groups, one before this group and one
1517               // after us (at the beginning). We're going to remove this group,
1518               // but also the group at the very beginning.
1519               IP->EndIdx = IN->EndIdx;
1520               IP->Repl32CR = IP->Repl32CR || IN->Repl32CR || I->RLAmt >= 32;
1521               IP->Repl32Coalesced = true;
1522               I = BitGroups.erase(I);
1523               BitGroups.erase(BitGroups.begin());
1524             }
1525 
1526             // This must be the last group in the vector (and we might have
1527             // just invalidated the iterator above), so break here.
1528             break;
1529           }
1530         }
1531       }
1532 
1533       ++I;
1534     }
1535   }
1536 
1537   SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
1538     return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
1539   }
1540 
1541   uint64_t getZerosMask() {
1542     uint64_t Mask = 0;
1543     for (unsigned i = 0; i < Bits.size(); ++i) {
1544       if (Bits[i].hasValue())
1545         continue;
1546       Mask |= (UINT64_C(1) << i);
1547     }
1548 
1549     return ~Mask;
1550   }
1551 
1552   // This method extends an input value to 64 bit if input is 32-bit integer.
1553   // While selecting instructions in BitPermutationSelector in 64-bit mode,
1554   // an input value can be a 32-bit integer if a ZERO_EXTEND node is included.
1555   // In such case, we extend it to 64 bit to be consistent with other values.
1556   SDValue ExtendToInt64(SDValue V, const SDLoc &dl) {
1557     if (V.getValueSizeInBits() == 64)
1558       return V;
1559 
1560     assert(V.getValueSizeInBits() == 32);
1561     SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
1562     SDValue ImDef = SDValue(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl,
1563                                                    MVT::i64), 0);
1564     SDValue ExtVal = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl,
1565                                                     MVT::i64, ImDef, V,
1566                                                     SubRegIdx), 0);
1567     return ExtVal;
1568   }
1569 
1570   // Depending on the number of groups for a particular value, it might be
1571   // better to rotate, mask explicitly (using andi/andis), and then or the
1572   // result. Select this part of the result first.
1573   void SelectAndParts32(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) {
1574     if (BPermRewriterNoMasking)
1575       return;
1576 
1577     for (ValueRotInfo &VRI : ValueRotsVec) {
1578       unsigned Mask = 0;
1579       for (unsigned i = 0; i < Bits.size(); ++i) {
1580         if (!Bits[i].hasValue() || Bits[i].getValue() != VRI.V)
1581           continue;
1582         if (RLAmt[i] != VRI.RLAmt)
1583           continue;
1584         Mask |= (1u << i);
1585       }
1586 
1587       // Compute the masks for andi/andis that would be necessary.
1588       unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16;
1589       assert((ANDIMask != 0 || ANDISMask != 0) &&
1590              "No set bits in mask for value bit groups");
1591       bool NeedsRotate = VRI.RLAmt != 0;
1592 
1593       // We're trying to minimize the number of instructions. If we have one
1594       // group, using one of andi/andis can break even.  If we have three
1595       // groups, we can use both andi and andis and break even (to use both
1596       // andi and andis we also need to or the results together). We need four
1597       // groups if we also need to rotate. To use andi/andis we need to do more
1598       // than break even because rotate-and-mask instructions tend to be easier
1599       // to schedule.
1600 
1601       // FIXME: We've biased here against using andi/andis, which is right for
1602       // POWER cores, but not optimal everywhere. For example, on the A2,
1603       // andi/andis have single-cycle latency whereas the rotate-and-mask
1604       // instructions take two cycles, and it would be better to bias toward
1605       // andi/andis in break-even cases.
1606 
1607       unsigned NumAndInsts = (unsigned) NeedsRotate +
1608                              (unsigned) (ANDIMask != 0) +
1609                              (unsigned) (ANDISMask != 0) +
1610                              (unsigned) (ANDIMask != 0 && ANDISMask != 0) +
1611                              (unsigned) (bool) Res;
1612 
1613       DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode() <<
1614                       " RL: " << VRI.RLAmt << ":" <<
1615                       "\n\t\t\tisel using masking: " << NumAndInsts <<
1616                       " using rotates: " << VRI.NumGroups << "\n");
1617 
1618       if (NumAndInsts >= VRI.NumGroups)
1619         continue;
1620 
1621       DEBUG(dbgs() << "\t\t\t\tusing masking\n");
1622 
1623       if (InstCnt) *InstCnt += NumAndInsts;
1624 
1625       SDValue VRot;
1626       if (VRI.RLAmt) {
1627         SDValue Ops[] =
1628           { VRI.V, getI32Imm(VRI.RLAmt, dl), getI32Imm(0, dl),
1629             getI32Imm(31, dl) };
1630         VRot = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
1631                                               Ops), 0);
1632       } else {
1633         VRot = VRI.V;
1634       }
1635 
1636       SDValue ANDIVal, ANDISVal;
1637       if (ANDIMask != 0)
1638         ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo, dl, MVT::i32,
1639                             VRot, getI32Imm(ANDIMask, dl)), 0);
1640       if (ANDISMask != 0)
1641         ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo, dl, MVT::i32,
1642                              VRot, getI32Imm(ANDISMask, dl)), 0);
1643 
1644       SDValue TotalVal;
1645       if (!ANDIVal)
1646         TotalVal = ANDISVal;
1647       else if (!ANDISVal)
1648         TotalVal = ANDIVal;
1649       else
1650         TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
1651                              ANDIVal, ANDISVal), 0);
1652 
1653       if (!Res)
1654         Res = TotalVal;
1655       else
1656         Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
1657                         Res, TotalVal), 0);
1658 
1659       // Now, remove all groups with this underlying value and rotation
1660       // factor.
1661       eraseMatchingBitGroups([VRI](const BitGroup &BG) {
1662         return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt;
1663       });
1664     }
1665   }
1666 
1667   // Instruction selection for the 32-bit case.
1668   SDNode *Select32(SDNode *N, bool LateMask, unsigned *InstCnt) {
1669     SDLoc dl(N);
1670     SDValue Res;
1671 
1672     if (InstCnt) *InstCnt = 0;
1673 
1674     // Take care of cases that should use andi/andis first.
1675     SelectAndParts32(dl, Res, InstCnt);
1676 
1677     // If we've not yet selected a 'starting' instruction, and we have no zeros
1678     // to fill in, select the (Value, RLAmt) with the highest priority (largest
1679     // number of groups), and start with this rotated value.
1680     if ((!HasZeros || LateMask) && !Res) {
1681       ValueRotInfo &VRI = ValueRotsVec[0];
1682       if (VRI.RLAmt) {
1683         if (InstCnt) *InstCnt += 1;
1684         SDValue Ops[] =
1685           { VRI.V, getI32Imm(VRI.RLAmt, dl), getI32Imm(0, dl),
1686             getI32Imm(31, dl) };
1687         Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops),
1688                       0);
1689       } else {
1690         Res = VRI.V;
1691       }
1692 
1693       // Now, remove all groups with this underlying value and rotation factor.
1694       eraseMatchingBitGroups([VRI](const BitGroup &BG) {
1695         return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt;
1696       });
1697     }
1698 
1699     if (InstCnt) *InstCnt += BitGroups.size();
1700 
1701     // Insert the other groups (one at a time).
1702     for (auto &BG : BitGroups) {
1703       if (!Res) {
1704         SDValue Ops[] =
1705           { BG.V, getI32Imm(BG.RLAmt, dl),
1706             getI32Imm(Bits.size() - BG.EndIdx - 1, dl),
1707             getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };
1708         Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
1709       } else {
1710         SDValue Ops[] =
1711           { Res, BG.V, getI32Imm(BG.RLAmt, dl),
1712               getI32Imm(Bits.size() - BG.EndIdx - 1, dl),
1713             getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };
1714         Res = SDValue(CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops), 0);
1715       }
1716     }
1717 
1718     if (LateMask) {
1719       unsigned Mask = (unsigned) getZerosMask();
1720 
1721       unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16;
1722       assert((ANDIMask != 0 || ANDISMask != 0) &&
1723              "No set bits in zeros mask?");
1724 
1725       if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) +
1726                                (unsigned) (ANDISMask != 0) +
1727                                (unsigned) (ANDIMask != 0 && ANDISMask != 0);
1728 
1729       SDValue ANDIVal, ANDISVal;
1730       if (ANDIMask != 0)
1731         ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo, dl, MVT::i32,
1732                             Res, getI32Imm(ANDIMask, dl)), 0);
1733       if (ANDISMask != 0)
1734         ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo, dl, MVT::i32,
1735                              Res, getI32Imm(ANDISMask, dl)), 0);
1736 
1737       if (!ANDIVal)
1738         Res = ANDISVal;
1739       else if (!ANDISVal)
1740         Res = ANDIVal;
1741       else
1742         Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
1743                         ANDIVal, ANDISVal), 0);
1744     }
1745 
1746     return Res.getNode();
1747   }
1748 
1749   unsigned SelectRotMask64Count(unsigned RLAmt, bool Repl32,
1750                                 unsigned MaskStart, unsigned MaskEnd,
1751                                 bool IsIns) {
1752     // In the notation used by the instructions, 'start' and 'end' are reversed
1753     // because bits are counted from high to low order.
1754     unsigned InstMaskStart = 64 - MaskEnd - 1,
1755              InstMaskEnd   = 64 - MaskStart - 1;
1756 
1757     if (Repl32)
1758       return 1;
1759 
1760     if ((!IsIns && (InstMaskEnd == 63 || InstMaskStart == 0)) ||
1761         InstMaskEnd == 63 - RLAmt)
1762       return 1;
1763 
1764     return 2;
1765   }
1766 
1767   // For 64-bit values, not all combinations of rotates and masks are
1768   // available. Produce one if it is available.
1769   SDValue SelectRotMask64(SDValue V, const SDLoc &dl, unsigned RLAmt,
1770                           bool Repl32, unsigned MaskStart, unsigned MaskEnd,
1771                           unsigned *InstCnt = nullptr) {
1772     // In the notation used by the instructions, 'start' and 'end' are reversed
1773     // because bits are counted from high to low order.
1774     unsigned InstMaskStart = 64 - MaskEnd - 1,
1775              InstMaskEnd   = 64 - MaskStart - 1;
1776 
1777     if (InstCnt) *InstCnt += 1;
1778 
1779     if (Repl32) {
1780       // This rotation amount assumes that the lower 32 bits of the quantity
1781       // are replicated in the high 32 bits by the rotation operator (which is
1782       // done by rlwinm and friends).
1783       assert(InstMaskStart >= 32 && "Mask cannot start out of range");
1784       assert(InstMaskEnd   >= 32 && "Mask cannot end out of range");
1785       SDValue Ops[] =
1786         { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
1787           getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) };
1788       return SDValue(CurDAG->getMachineNode(PPC::RLWINM8, dl, MVT::i64,
1789                                             Ops), 0);
1790     }
1791 
1792     if (InstMaskEnd == 63) {
1793       SDValue Ops[] =
1794         { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
1795           getI32Imm(InstMaskStart, dl) };
1796       return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Ops), 0);
1797     }
1798 
1799     if (InstMaskStart == 0) {
1800       SDValue Ops[] =
1801         { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
1802           getI32Imm(InstMaskEnd, dl) };
1803       return SDValue(CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Ops), 0);
1804     }
1805 
1806     if (InstMaskEnd == 63 - RLAmt) {
1807       SDValue Ops[] =
1808         { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
1809           getI32Imm(InstMaskStart, dl) };
1810       return SDValue(CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, Ops), 0);
1811     }
1812 
1813     // We cannot do this with a single instruction, so we'll use two. The
1814     // problem is that we're not free to choose both a rotation amount and mask
1815     // start and end independently. We can choose an arbitrary mask start and
1816     // end, but then the rotation amount is fixed. Rotation, however, can be
1817     // inverted, and so by applying an "inverse" rotation first, we can get the
1818     // desired result.
1819     if (InstCnt) *InstCnt += 1;
1820 
1821     // The rotation mask for the second instruction must be MaskStart.
1822     unsigned RLAmt2 = MaskStart;
1823     // The first instruction must rotate V so that the overall rotation amount
1824     // is RLAmt.
1825     unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64;
1826     if (RLAmt1)
1827       V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63);
1828     return SelectRotMask64(V, dl, RLAmt2, false, MaskStart, MaskEnd);
1829   }
1830 
1831   // For 64-bit values, not all combinations of rotates and masks are
1832   // available. Produce a rotate-mask-and-insert if one is available.
1833   SDValue SelectRotMaskIns64(SDValue Base, SDValue V, const SDLoc &dl,
1834                              unsigned RLAmt, bool Repl32, unsigned MaskStart,
1835                              unsigned MaskEnd, unsigned *InstCnt = nullptr) {
1836     // In the notation used by the instructions, 'start' and 'end' are reversed
1837     // because bits are counted from high to low order.
1838     unsigned InstMaskStart = 64 - MaskEnd - 1,
1839              InstMaskEnd   = 64 - MaskStart - 1;
1840 
1841     if (InstCnt) *InstCnt += 1;
1842 
1843     if (Repl32) {
1844       // This rotation amount assumes that the lower 32 bits of the quantity
1845       // are replicated in the high 32 bits by the rotation operator (which is
1846       // done by rlwinm and friends).
1847       assert(InstMaskStart >= 32 && "Mask cannot start out of range");
1848       assert(InstMaskEnd   >= 32 && "Mask cannot end out of range");
1849       SDValue Ops[] =
1850         { ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
1851           getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) };
1852       return SDValue(CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64,
1853                                             Ops), 0);
1854     }
1855 
1856     if (InstMaskEnd == 63 - RLAmt) {
1857       SDValue Ops[] =
1858         { ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
1859           getI32Imm(InstMaskStart, dl) };
1860       return SDValue(CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops), 0);
1861     }
1862 
1863     // We cannot do this with a single instruction, so we'll use two. The
1864     // problem is that we're not free to choose both a rotation amount and mask
1865     // start and end independently. We can choose an arbitrary mask start and
1866     // end, but then the rotation amount is fixed. Rotation, however, can be
1867     // inverted, and so by applying an "inverse" rotation first, we can get the
1868     // desired result.
1869     if (InstCnt) *InstCnt += 1;
1870 
1871     // The rotation mask for the second instruction must be MaskStart.
1872     unsigned RLAmt2 = MaskStart;
1873     // The first instruction must rotate V so that the overall rotation amount
1874     // is RLAmt.
1875     unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64;
1876     if (RLAmt1)
1877       V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63);
1878     return SelectRotMaskIns64(Base, V, dl, RLAmt2, false, MaskStart, MaskEnd);
1879   }
1880 
1881   void SelectAndParts64(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) {
1882     if (BPermRewriterNoMasking)
1883       return;
1884 
1885     // The idea here is the same as in the 32-bit version, but with additional
1886     // complications from the fact that Repl32 might be true. Because we
1887     // aggressively convert bit groups to Repl32 form (which, for small
1888     // rotation factors, involves no other change), and then coalesce, it might
1889     // be the case that a single 64-bit masking operation could handle both
1890     // some Repl32 groups and some non-Repl32 groups. If converting to Repl32
1891     // form allowed coalescing, then we must use a 32-bit rotaton in order to
1892     // completely capture the new combined bit group.
1893 
1894     for (ValueRotInfo &VRI : ValueRotsVec) {
1895       uint64_t Mask = 0;
1896 
1897       // We need to add to the mask all bits from the associated bit groups.
1898       // If Repl32 is false, we need to add bits from bit groups that have
1899       // Repl32 true, but are trivially convertable to Repl32 false. Such a
1900       // group is trivially convertable if it overlaps only with the lower 32
1901       // bits, and the group has not been coalesced.
1902       auto MatchingBG = [VRI](const BitGroup &BG) {
1903         if (VRI.V != BG.V)
1904           return false;
1905 
1906         unsigned EffRLAmt = BG.RLAmt;
1907         if (!VRI.Repl32 && BG.Repl32) {
1908           if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx <= BG.EndIdx &&
1909               !BG.Repl32Coalesced) {
1910             if (BG.Repl32CR)
1911               EffRLAmt += 32;
1912           } else {
1913             return false;
1914           }
1915         } else if (VRI.Repl32 != BG.Repl32) {
1916           return false;
1917         }
1918 
1919         return VRI.RLAmt == EffRLAmt;
1920       };
1921 
1922       for (auto &BG : BitGroups) {
1923         if (!MatchingBG(BG))
1924           continue;
1925 
1926         if (BG.StartIdx <= BG.EndIdx) {
1927           for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i)
1928             Mask |= (UINT64_C(1) << i);
1929         } else {
1930           for (unsigned i = BG.StartIdx; i < Bits.size(); ++i)
1931             Mask |= (UINT64_C(1) << i);
1932           for (unsigned i = 0; i <= BG.EndIdx; ++i)
1933             Mask |= (UINT64_C(1) << i);
1934         }
1935       }
1936 
1937       // We can use the 32-bit andi/andis technique if the mask does not
1938       // require any higher-order bits. This can save an instruction compared
1939       // to always using the general 64-bit technique.
1940       bool Use32BitInsts = isUInt<32>(Mask);
1941       // Compute the masks for andi/andis that would be necessary.
1942       unsigned ANDIMask = (Mask & UINT16_MAX),
1943                ANDISMask = (Mask >> 16) & UINT16_MAX;
1944 
1945       bool NeedsRotate = VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask));
1946 
1947       unsigned NumAndInsts = (unsigned) NeedsRotate +
1948                              (unsigned) (bool) Res;
1949       if (Use32BitInsts)
1950         NumAndInsts += (unsigned) (ANDIMask != 0) + (unsigned) (ANDISMask != 0) +
1951                        (unsigned) (ANDIMask != 0 && ANDISMask != 0);
1952       else
1953         NumAndInsts += selectI64ImmInstrCount(Mask) + /* and */ 1;
1954 
1955       unsigned NumRLInsts = 0;
1956       bool FirstBG = true;
1957       bool MoreBG = false;
1958       for (auto &BG : BitGroups) {
1959         if (!MatchingBG(BG)) {
1960           MoreBG = true;
1961           continue;
1962         }
1963         NumRLInsts +=
1964           SelectRotMask64Count(BG.RLAmt, BG.Repl32, BG.StartIdx, BG.EndIdx,
1965                                !FirstBG);
1966         FirstBG = false;
1967       }
1968 
1969       DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode() <<
1970                       " RL: " << VRI.RLAmt << (VRI.Repl32 ? " (32):" : ":") <<
1971                       "\n\t\t\tisel using masking: " << NumAndInsts <<
1972                       " using rotates: " << NumRLInsts << "\n");
1973 
1974       // When we'd use andi/andis, we bias toward using the rotates (andi only
1975       // has a record form, and is cracked on POWER cores). However, when using
1976       // general 64-bit constant formation, bias toward the constant form,
1977       // because that exposes more opportunities for CSE.
1978       if (NumAndInsts > NumRLInsts)
1979         continue;
1980       // When merging multiple bit groups, instruction or is used.
1981       // But when rotate is used, rldimi can inert the rotated value into any
1982       // register, so instruction or can be avoided.
1983       if ((Use32BitInsts || MoreBG) && NumAndInsts == NumRLInsts)
1984         continue;
1985 
1986       DEBUG(dbgs() << "\t\t\t\tusing masking\n");
1987 
1988       if (InstCnt) *InstCnt += NumAndInsts;
1989 
1990       SDValue VRot;
1991       // We actually need to generate a rotation if we have a non-zero rotation
1992       // factor or, in the Repl32 case, if we care about any of the
1993       // higher-order replicated bits. In the latter case, we generate a mask
1994       // backward so that it actually includes the entire 64 bits.
1995       if (VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask)))
1996         VRot = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32,
1997                                VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63);
1998       else
1999         VRot = VRI.V;
2000 
2001       SDValue TotalVal;
2002       if (Use32BitInsts) {
2003         assert((ANDIMask != 0 || ANDISMask != 0) &&
2004                "No set bits in mask when using 32-bit ands for 64-bit value");
2005 
2006         SDValue ANDIVal, ANDISVal;
2007         if (ANDIMask != 0)
2008           ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo8, dl, MVT::i64,
2009                                                    ExtendToInt64(VRot, dl),
2010                                                    getI32Imm(ANDIMask, dl)),
2011                             0);
2012         if (ANDISMask != 0)
2013           ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo8, dl, MVT::i64,
2014                                                     ExtendToInt64(VRot, dl),
2015                                                     getI32Imm(ANDISMask, dl)),
2016                              0);
2017 
2018         if (!ANDIVal)
2019           TotalVal = ANDISVal;
2020         else if (!ANDISVal)
2021           TotalVal = ANDIVal;
2022         else
2023           TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
2024                                ExtendToInt64(ANDIVal, dl), ANDISVal), 0);
2025       } else {
2026         TotalVal = SDValue(selectI64Imm(CurDAG, dl, Mask), 0);
2027         TotalVal =
2028           SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64,
2029                                          ExtendToInt64(VRot, dl), TotalVal),
2030                   0);
2031      }
2032 
2033       if (!Res)
2034         Res = TotalVal;
2035       else
2036         Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
2037                                              ExtendToInt64(Res, dl), TotalVal),
2038                       0);
2039 
2040       // Now, remove all groups with this underlying value and rotation
2041       // factor.
2042       eraseMatchingBitGroups(MatchingBG);
2043     }
2044   }
2045 
2046   // Instruction selection for the 64-bit case.
2047   SDNode *Select64(SDNode *N, bool LateMask, unsigned *InstCnt) {
2048     SDLoc dl(N);
2049     SDValue Res;
2050 
2051     if (InstCnt) *InstCnt = 0;
2052 
2053     // Take care of cases that should use andi/andis first.
2054     SelectAndParts64(dl, Res, InstCnt);
2055 
2056     // If we've not yet selected a 'starting' instruction, and we have no zeros
2057     // to fill in, select the (Value, RLAmt) with the highest priority (largest
2058     // number of groups), and start with this rotated value.
2059     if ((!HasZeros || LateMask) && !Res) {
2060       // If we have both Repl32 groups and non-Repl32 groups, the non-Repl32
2061       // groups will come first, and so the VRI representing the largest number
2062       // of groups might not be first (it might be the first Repl32 groups).
2063       unsigned MaxGroupsIdx = 0;
2064       if (!ValueRotsVec[0].Repl32) {
2065         for (unsigned i = 0, ie = ValueRotsVec.size(); i < ie; ++i)
2066           if (ValueRotsVec[i].Repl32) {
2067             if (ValueRotsVec[i].NumGroups > ValueRotsVec[0].NumGroups)
2068               MaxGroupsIdx = i;
2069             break;
2070           }
2071       }
2072 
2073       ValueRotInfo &VRI = ValueRotsVec[MaxGroupsIdx];
2074       bool NeedsRotate = false;
2075       if (VRI.RLAmt) {
2076         NeedsRotate = true;
2077       } else if (VRI.Repl32) {
2078         for (auto &BG : BitGroups) {
2079           if (BG.V != VRI.V || BG.RLAmt != VRI.RLAmt ||
2080               BG.Repl32 != VRI.Repl32)
2081             continue;
2082 
2083           // We don't need a rotate if the bit group is confined to the lower
2084           // 32 bits.
2085           if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx < BG.EndIdx)
2086             continue;
2087 
2088           NeedsRotate = true;
2089           break;
2090         }
2091       }
2092 
2093       if (NeedsRotate)
2094         Res = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32,
2095                               VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63,
2096                               InstCnt);
2097       else
2098         Res = VRI.V;
2099 
2100       // Now, remove all groups with this underlying value and rotation factor.
2101       if (Res)
2102         eraseMatchingBitGroups([VRI](const BitGroup &BG) {
2103           return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt &&
2104                  BG.Repl32 == VRI.Repl32;
2105         });
2106     }
2107 
2108     // Because 64-bit rotates are more flexible than inserts, we might have a
2109     // preference regarding which one we do first (to save one instruction).
2110     if (!Res)
2111       for (auto I = BitGroups.begin(), IE = BitGroups.end(); I != IE; ++I) {
2112         if (SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx,
2113                                 false) <
2114             SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx,
2115                                 true)) {
2116           if (I != BitGroups.begin()) {
2117             BitGroup BG = *I;
2118             BitGroups.erase(I);
2119             BitGroups.insert(BitGroups.begin(), BG);
2120           }
2121 
2122           break;
2123         }
2124       }
2125 
2126     // Insert the other groups (one at a time).
2127     for (auto &BG : BitGroups) {
2128       if (!Res)
2129         Res = SelectRotMask64(BG.V, dl, BG.RLAmt, BG.Repl32, BG.StartIdx,
2130                               BG.EndIdx, InstCnt);
2131       else
2132         Res = SelectRotMaskIns64(Res, BG.V, dl, BG.RLAmt, BG.Repl32,
2133                                  BG.StartIdx, BG.EndIdx, InstCnt);
2134     }
2135 
2136     if (LateMask) {
2137       uint64_t Mask = getZerosMask();
2138 
2139       // We can use the 32-bit andi/andis technique if the mask does not
2140       // require any higher-order bits. This can save an instruction compared
2141       // to always using the general 64-bit technique.
2142       bool Use32BitInsts = isUInt<32>(Mask);
2143       // Compute the masks for andi/andis that would be necessary.
2144       unsigned ANDIMask = (Mask & UINT16_MAX),
2145                ANDISMask = (Mask >> 16) & UINT16_MAX;
2146 
2147       if (Use32BitInsts) {
2148         assert((ANDIMask != 0 || ANDISMask != 0) &&
2149                "No set bits in mask when using 32-bit ands for 64-bit value");
2150 
2151         if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) +
2152                                  (unsigned) (ANDISMask != 0) +
2153                                  (unsigned) (ANDIMask != 0 && ANDISMask != 0);
2154 
2155         SDValue ANDIVal, ANDISVal;
2156         if (ANDIMask != 0)
2157           ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo8, dl, MVT::i64,
2158                               ExtendToInt64(Res, dl), getI32Imm(ANDIMask, dl)), 0);
2159         if (ANDISMask != 0)
2160           ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo8, dl, MVT::i64,
2161                                ExtendToInt64(Res, dl), getI32Imm(ANDISMask, dl)), 0);
2162 
2163         if (!ANDIVal)
2164           Res = ANDISVal;
2165         else if (!ANDISVal)
2166           Res = ANDIVal;
2167         else
2168           Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
2169                           ExtendToInt64(ANDIVal, dl), ANDISVal), 0);
2170       } else {
2171         if (InstCnt) *InstCnt += selectI64ImmInstrCount(Mask) + /* and */ 1;
2172 
2173         SDValue MaskVal = SDValue(selectI64Imm(CurDAG, dl, Mask), 0);
2174         Res =
2175           SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64,
2176                                          ExtendToInt64(Res, dl), MaskVal), 0);
2177       }
2178     }
2179 
2180     return Res.getNode();
2181   }
2182 
2183   SDNode *Select(SDNode *N, bool LateMask, unsigned *InstCnt = nullptr) {
2184     // Fill in BitGroups.
2185     collectBitGroups(LateMask);
2186     if (BitGroups.empty())
2187       return nullptr;
2188 
2189     // For 64-bit values, figure out when we can use 32-bit instructions.
2190     if (Bits.size() == 64)
2191       assignRepl32BitGroups();
2192 
2193     // Fill in ValueRotsVec.
2194     collectValueRotInfo();
2195 
2196     if (Bits.size() == 32) {
2197       return Select32(N, LateMask, InstCnt);
2198     } else {
2199       assert(Bits.size() == 64 && "Not 64 bits here?");
2200       return Select64(N, LateMask, InstCnt);
2201     }
2202 
2203     return nullptr;
2204   }
2205 
2206   void eraseMatchingBitGroups(function_ref<bool(const BitGroup &)> F) {
2207     BitGroups.erase(remove_if(BitGroups, F), BitGroups.end());
2208   }
2209 
2210   SmallVector<ValueBit, 64> Bits;
2211 
2212   bool HasZeros;
2213   SmallVector<unsigned, 64> RLAmt;
2214 
2215   SmallVector<BitGroup, 16> BitGroups;
2216 
2217   DenseMap<std::pair<SDValue, unsigned>, ValueRotInfo> ValueRots;
2218   SmallVector<ValueRotInfo, 16> ValueRotsVec;
2219 
2220   SelectionDAG *CurDAG;
2221 
2222 public:
2223   BitPermutationSelector(SelectionDAG *DAG)
2224     : CurDAG(DAG) {}
2225 
2226   // Here we try to match complex bit permutations into a set of
2227   // rotate-and-shift/shift/and/or instructions, using a set of heuristics
2228   // known to produce optimial code for common cases (like i32 byte swapping).
2229   SDNode *Select(SDNode *N) {
2230     Memoizer.clear();
2231     auto Result =
2232         getValueBits(SDValue(N, 0), N->getValueType(0).getSizeInBits());
2233     if (!Result.first)
2234       return nullptr;
2235     Bits = std::move(*Result.second);
2236 
2237     DEBUG(dbgs() << "Considering bit-permutation-based instruction"
2238                     " selection for:    ");
2239     DEBUG(N->dump(CurDAG));
2240 
2241     // Fill it RLAmt and set HasZeros.
2242     computeRotationAmounts();
2243 
2244     if (!HasZeros)
2245       return Select(N, false);
2246 
2247     // We currently have two techniques for handling results with zeros: early
2248     // masking (the default) and late masking. Late masking is sometimes more
2249     // efficient, but because the structure of the bit groups is different, it
2250     // is hard to tell without generating both and comparing the results. With
2251     // late masking, we ignore zeros in the resulting value when inserting each
2252     // set of bit groups, and then mask in the zeros at the end. With early
2253     // masking, we only insert the non-zero parts of the result at every step.
2254 
2255     unsigned InstCnt, InstCntLateMask;
2256     DEBUG(dbgs() << "\tEarly masking:\n");
2257     SDNode *RN = Select(N, false, &InstCnt);
2258     DEBUG(dbgs() << "\t\tisel would use " << InstCnt << " instructions\n");
2259 
2260     DEBUG(dbgs() << "\tLate masking:\n");
2261     SDNode *RNLM = Select(N, true, &InstCntLateMask);
2262     DEBUG(dbgs() << "\t\tisel would use " << InstCntLateMask <<
2263                     " instructions\n");
2264 
2265     if (InstCnt <= InstCntLateMask) {
2266       DEBUG(dbgs() << "\tUsing early-masking for isel\n");
2267       return RN;
2268     }
2269 
2270     DEBUG(dbgs() << "\tUsing late-masking for isel\n");
2271     return RNLM;
2272   }
2273 };
2274 
2275 class IntegerCompareEliminator {
2276   SelectionDAG *CurDAG;
2277   PPCDAGToDAGISel *S;
2278   // Conversion type for interpreting results of a 32-bit instruction as
2279   // a 64-bit value or vice versa.
2280   enum ExtOrTruncConversion { Ext, Trunc };
2281 
2282   // Modifiers to guide how an ISD::SETCC node's result is to be computed
2283   // in a GPR.
2284   // ZExtOrig - use the original condition code, zero-extend value
2285   // ZExtInvert - invert the condition code, zero-extend value
2286   // SExtOrig - use the original condition code, sign-extend value
2287   // SExtInvert - invert the condition code, sign-extend value
2288   enum SetccInGPROpts { ZExtOrig, ZExtInvert, SExtOrig, SExtInvert };
2289 
2290   // Comparisons against zero to emit GPR code sequences for. Each of these
2291   // sequences may need to be emitted for two or more equivalent patterns.
2292   // For example (a >= 0) == (a > -1). The direction of the comparison (</>)
2293   // matters as well as the extension type: sext (-1/0), zext (1/0).
2294   // GEZExt - (zext (LHS >= 0))
2295   // GESExt - (sext (LHS >= 0))
2296   // LEZExt - (zext (LHS <= 0))
2297   // LESExt - (sext (LHS <= 0))
2298   enum ZeroCompare { GEZExt, GESExt, LEZExt, LESExt };
2299 
2300   SDNode *tryEXTEND(SDNode *N);
2301   SDNode *tryLogicOpOfCompares(SDNode *N);
2302   SDValue computeLogicOpInGPR(SDValue LogicOp);
2303   SDValue signExtendInputIfNeeded(SDValue Input);
2304   SDValue zeroExtendInputIfNeeded(SDValue Input);
2305   SDValue addExtOrTrunc(SDValue NatWidthRes, ExtOrTruncConversion Conv);
2306   SDValue getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl,
2307                                         ZeroCompare CmpTy);
2308   SDValue get32BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2309                               int64_t RHSValue, SDLoc dl);
2310  SDValue get32BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2311                               int64_t RHSValue, SDLoc dl);
2312   SDValue get64BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2313                               int64_t RHSValue, SDLoc dl);
2314   SDValue get64BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2315                               int64_t RHSValue, SDLoc dl);
2316   SDValue getSETCCInGPR(SDValue Compare, SetccInGPROpts ConvOpts);
2317 
2318 public:
2319   IntegerCompareEliminator(SelectionDAG *DAG,
2320                            PPCDAGToDAGISel *Sel) : CurDAG(DAG), S(Sel) {
2321     assert(CurDAG->getTargetLoweringInfo()
2322            .getPointerTy(CurDAG->getDataLayout()).getSizeInBits() == 64 &&
2323            "Only expecting to use this on 64 bit targets.");
2324   }
2325   SDNode *Select(SDNode *N) {
2326     if (CmpInGPR == ICGPR_None)
2327       return nullptr;
2328     switch (N->getOpcode()) {
2329     default: break;
2330     case ISD::ZERO_EXTEND:
2331       if (CmpInGPR == ICGPR_Sext || CmpInGPR == ICGPR_SextI32 ||
2332           CmpInGPR == ICGPR_SextI64)
2333         return nullptr;
2334       LLVM_FALLTHROUGH;
2335     case ISD::SIGN_EXTEND:
2336       if (CmpInGPR == ICGPR_Zext || CmpInGPR == ICGPR_ZextI32 ||
2337           CmpInGPR == ICGPR_ZextI64)
2338         return nullptr;
2339       return tryEXTEND(N);
2340     case ISD::AND:
2341     case ISD::OR:
2342     case ISD::XOR:
2343       return tryLogicOpOfCompares(N);
2344     }
2345     return nullptr;
2346   }
2347 };
2348 
2349 static bool isLogicOp(unsigned Opc) {
2350   return Opc == ISD::AND || Opc == ISD::OR || Opc == ISD::XOR;
2351 }
2352 // The obvious case for wanting to keep the value in a GPR. Namely, the
2353 // result of the comparison is actually needed in a GPR.
2354 SDNode *IntegerCompareEliminator::tryEXTEND(SDNode *N) {
2355   assert((N->getOpcode() == ISD::ZERO_EXTEND ||
2356           N->getOpcode() == ISD::SIGN_EXTEND) &&
2357          "Expecting a zero/sign extend node!");
2358   SDValue WideRes;
2359   // If we are zero-extending the result of a logical operation on i1
2360   // values, we can keep the values in GPRs.
2361   if (isLogicOp(N->getOperand(0).getOpcode()) &&
2362       N->getOperand(0).getValueType() == MVT::i1 &&
2363       N->getOpcode() == ISD::ZERO_EXTEND)
2364     WideRes = computeLogicOpInGPR(N->getOperand(0));
2365   else if (N->getOperand(0).getOpcode() != ISD::SETCC)
2366     return nullptr;
2367   else
2368     WideRes =
2369       getSETCCInGPR(N->getOperand(0),
2370                     N->getOpcode() == ISD::SIGN_EXTEND ?
2371                     SetccInGPROpts::SExtOrig : SetccInGPROpts::ZExtOrig);
2372 
2373   if (!WideRes)
2374     return nullptr;
2375 
2376   SDLoc dl(N);
2377   bool Input32Bit = WideRes.getValueType() == MVT::i32;
2378   bool Output32Bit = N->getValueType(0) == MVT::i32;
2379 
2380   NumSextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 1 : 0;
2381   NumZextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 0 : 1;
2382 
2383   SDValue ConvOp = WideRes;
2384   if (Input32Bit != Output32Bit)
2385     ConvOp = addExtOrTrunc(WideRes, Input32Bit ? ExtOrTruncConversion::Ext :
2386                            ExtOrTruncConversion::Trunc);
2387   return ConvOp.getNode();
2388 }
2389 
2390 // Attempt to perform logical operations on the results of comparisons while
2391 // keeping the values in GPRs. Without doing so, these would end up being
2392 // lowered to CR-logical operations which suffer from significant latency and
2393 // low ILP.
2394 SDNode *IntegerCompareEliminator::tryLogicOpOfCompares(SDNode *N) {
2395   if (N->getValueType(0) != MVT::i1)
2396     return nullptr;
2397   assert(isLogicOp(N->getOpcode()) &&
2398          "Expected a logic operation on setcc results.");
2399   SDValue LoweredLogical = computeLogicOpInGPR(SDValue(N, 0));
2400   if (!LoweredLogical)
2401     return nullptr;
2402 
2403   SDLoc dl(N);
2404   bool IsBitwiseNegate = LoweredLogical.getMachineOpcode() == PPC::XORI8;
2405   unsigned SubRegToExtract = IsBitwiseNegate ? PPC::sub_eq : PPC::sub_gt;
2406   SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32);
2407   SDValue LHS = LoweredLogical.getOperand(0);
2408   SDValue RHS = LoweredLogical.getOperand(1);
2409   SDValue WideOp;
2410   SDValue OpToConvToRecForm;
2411 
2412   // Look through any 32-bit to 64-bit implicit extend nodes to find the
2413   // opcode that is input to the XORI.
2414   if (IsBitwiseNegate &&
2415       LoweredLogical.getOperand(0).getMachineOpcode() == PPC::INSERT_SUBREG)
2416     OpToConvToRecForm = LoweredLogical.getOperand(0).getOperand(1);
2417   else if (IsBitwiseNegate)
2418     // If the input to the XORI isn't an extension, that's what we're after.
2419     OpToConvToRecForm = LoweredLogical.getOperand(0);
2420   else
2421     // If this is not an XORI, it is a reg-reg logical op and we can convert
2422     // it to record-form.
2423     OpToConvToRecForm = LoweredLogical;
2424 
2425   // Get the record-form version of the node we're looking to use to get the
2426   // CR result from.
2427   uint16_t NonRecOpc = OpToConvToRecForm.getMachineOpcode();
2428   int NewOpc = PPCInstrInfo::getRecordFormOpcode(NonRecOpc);
2429 
2430   // Convert the right node to record-form. This is either the logical we're
2431   // looking at or it is the input node to the negation (if we're looking at
2432   // a bitwise negation).
2433   if (NewOpc != -1 && IsBitwiseNegate) {
2434     // The input to the XORI has a record-form. Use it.
2435     assert(LoweredLogical.getConstantOperandVal(1) == 1 &&
2436            "Expected a PPC::XORI8 only for bitwise negation.");
2437     // Emit the record-form instruction.
2438     std::vector<SDValue> Ops;
2439     for (int i = 0, e = OpToConvToRecForm.getNumOperands(); i < e; i++)
2440       Ops.push_back(OpToConvToRecForm.getOperand(i));
2441 
2442     WideOp =
2443       SDValue(CurDAG->getMachineNode(NewOpc, dl,
2444                                      OpToConvToRecForm.getValueType(),
2445                                      MVT::Glue, Ops), 0);
2446   } else {
2447     assert((NewOpc != -1 || !IsBitwiseNegate) &&
2448            "No record form available for AND8/OR8/XOR8?");
2449     WideOp =
2450       SDValue(CurDAG->getMachineNode(NewOpc == -1 ? PPC::ANDIo8 : NewOpc, dl,
2451                                      MVT::i64, MVT::Glue, LHS, RHS), 0);
2452   }
2453 
2454   // Select this node to a single bit from CR0 set by the record-form node
2455   // just created. For bitwise negation, use the EQ bit which is the equivalent
2456   // of negating the result (i.e. it is a bit set when the result of the
2457   // operation is zero).
2458   SDValue SRIdxVal =
2459     CurDAG->getTargetConstant(SubRegToExtract, dl, MVT::i32);
2460   SDValue CRBit =
2461     SDValue(CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl,
2462                                    MVT::i1, CR0Reg, SRIdxVal,
2463                                    WideOp.getValue(1)), 0);
2464   return CRBit.getNode();
2465 }
2466 
2467 // Lower a logical operation on i1 values into a GPR sequence if possible.
2468 // The result can be kept in a GPR if requested.
2469 // Three types of inputs can be handled:
2470 // - SETCC
2471 // - TRUNCATE
2472 // - Logical operation (AND/OR/XOR)
2473 // There is also a special case that is handled (namely a complement operation
2474 // achieved with xor %a, -1).
2475 SDValue IntegerCompareEliminator::computeLogicOpInGPR(SDValue LogicOp) {
2476   assert(isLogicOp(LogicOp.getOpcode()) &&
2477         "Can only handle logic operations here.");
2478   assert(LogicOp.getValueType() == MVT::i1 &&
2479          "Can only handle logic operations on i1 values here.");
2480   SDLoc dl(LogicOp);
2481   SDValue LHS, RHS;
2482 
2483  // Special case: xor %a, -1
2484   bool IsBitwiseNegation = isBitwiseNot(LogicOp);
2485 
2486   // Produces a GPR sequence for each operand of the binary logic operation.
2487   // For SETCC, it produces the respective comparison, for TRUNCATE it truncates
2488   // the value in a GPR and for logic operations, it will recursively produce
2489   // a GPR sequence for the operation.
2490  auto getLogicOperand = [&] (SDValue Operand) -> SDValue {
2491     unsigned OperandOpcode = Operand.getOpcode();
2492     if (OperandOpcode == ISD::SETCC)
2493       return getSETCCInGPR(Operand, SetccInGPROpts::ZExtOrig);
2494     else if (OperandOpcode == ISD::TRUNCATE) {
2495       SDValue InputOp = Operand.getOperand(0);
2496      EVT InVT = InputOp.getValueType();
2497       return SDValue(CurDAG->getMachineNode(InVT == MVT::i32 ? PPC::RLDICL_32 :
2498                                             PPC::RLDICL, dl, InVT, InputOp,
2499                                             S->getI64Imm(0, dl),
2500                                             S->getI64Imm(63, dl)), 0);
2501     } else if (isLogicOp(OperandOpcode))
2502       return computeLogicOpInGPR(Operand);
2503     return SDValue();
2504   };
2505   LHS = getLogicOperand(LogicOp.getOperand(0));
2506   RHS = getLogicOperand(LogicOp.getOperand(1));
2507 
2508   // If a GPR sequence can't be produced for the LHS we can't proceed.
2509   // Not producing a GPR sequence for the RHS is only a problem if this isn't
2510   // a bitwise negation operation.
2511   if (!LHS || (!RHS && !IsBitwiseNegation))
2512     return SDValue();
2513 
2514   NumLogicOpsOnComparison++;
2515 
2516   // We will use the inputs as 64-bit values.
2517   if (LHS.getValueType() == MVT::i32)
2518     LHS = addExtOrTrunc(LHS, ExtOrTruncConversion::Ext);
2519   if (!IsBitwiseNegation && RHS.getValueType() == MVT::i32)
2520     RHS = addExtOrTrunc(RHS, ExtOrTruncConversion::Ext);
2521 
2522   unsigned NewOpc;
2523   switch (LogicOp.getOpcode()) {
2524   default: llvm_unreachable("Unknown logic operation.");
2525   case ISD::AND: NewOpc = PPC::AND8; break;
2526   case ISD::OR:  NewOpc = PPC::OR8;  break;
2527   case ISD::XOR: NewOpc = PPC::XOR8; break;
2528   }
2529 
2530   if (IsBitwiseNegation) {
2531     RHS = S->getI64Imm(1, dl);
2532     NewOpc = PPC::XORI8;
2533   }
2534 
2535   return SDValue(CurDAG->getMachineNode(NewOpc, dl, MVT::i64, LHS, RHS), 0);
2536 
2537 }
2538 
2539 /// If the value isn't guaranteed to be sign-extended to 64-bits, extend it.
2540 /// Otherwise just reinterpret it as a 64-bit value.
2541 /// Useful when emitting comparison code for 32-bit values without using
2542 /// the compare instruction (which only considers the lower 32-bits).
2543 SDValue IntegerCompareEliminator::signExtendInputIfNeeded(SDValue Input) {
2544   assert(Input.getValueType() == MVT::i32 &&
2545          "Can only sign-extend 32-bit values here.");
2546   unsigned Opc = Input.getOpcode();
2547 
2548   // The value was sign extended and then truncated to 32-bits. No need to
2549   // sign extend it again.
2550   if (Opc == ISD::TRUNCATE &&
2551       (Input.getOperand(0).getOpcode() == ISD::AssertSext ||
2552        Input.getOperand(0).getOpcode() == ISD::SIGN_EXTEND))
2553     return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
2554 
2555   LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input);
2556   // The input is a sign-extending load. All ppc sign-extending loads
2557   // sign-extend to the full 64-bits.
2558   if (InputLoad && InputLoad->getExtensionType() == ISD::SEXTLOAD)
2559     return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
2560 
2561   ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input);
2562   // We don't sign-extend constants.
2563   if (InputConst)
2564     return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
2565 
2566   SDLoc dl(Input);
2567   SignExtensionsAdded++;
2568   return SDValue(CurDAG->getMachineNode(PPC::EXTSW_32_64, dl,
2569                                         MVT::i64, Input), 0);
2570 }
2571 
2572 /// If the value isn't guaranteed to be zero-extended to 64-bits, extend it.
2573 /// Otherwise just reinterpret it as a 64-bit value.
2574 /// Useful when emitting comparison code for 32-bit values without using
2575 /// the compare instruction (which only considers the lower 32-bits).
2576 SDValue IntegerCompareEliminator::zeroExtendInputIfNeeded(SDValue Input) {
2577   assert(Input.getValueType() == MVT::i32 &&
2578          "Can only zero-extend 32-bit values here.");
2579   unsigned Opc = Input.getOpcode();
2580 
2581   // The only condition under which we can omit the actual extend instruction:
2582   // - The value is a positive constant
2583   // - The value comes from a load that isn't a sign-extending load
2584   // An ISD::TRUNCATE needs to be zero-extended unless it is fed by a zext.
2585   bool IsTruncateOfZExt = Opc == ISD::TRUNCATE &&
2586     (Input.getOperand(0).getOpcode() == ISD::AssertZext ||
2587      Input.getOperand(0).getOpcode() == ISD::ZERO_EXTEND);
2588   if (IsTruncateOfZExt)
2589     return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
2590 
2591   ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input);
2592   if (InputConst && InputConst->getSExtValue() >= 0)
2593     return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
2594 
2595   LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input);
2596   // The input is a load that doesn't sign-extend (it will be zero-extended).
2597   if (InputLoad && InputLoad->getExtensionType() != ISD::SEXTLOAD)
2598     return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
2599 
2600   // None of the above, need to zero-extend.
2601   SDLoc dl(Input);
2602   ZeroExtensionsAdded++;
2603   return SDValue(CurDAG->getMachineNode(PPC::RLDICL_32_64, dl, MVT::i64, Input,
2604                                         S->getI64Imm(0, dl),
2605                                         S->getI64Imm(32, dl)), 0);
2606 }
2607 
2608 // Handle a 32-bit value in a 64-bit register and vice-versa. These are of
2609 // course not actual zero/sign extensions that will generate machine code,
2610 // they're just a way to reinterpret a 32 bit value in a register as a
2611 // 64 bit value and vice-versa.
2612 SDValue IntegerCompareEliminator::addExtOrTrunc(SDValue NatWidthRes,
2613                                                 ExtOrTruncConversion Conv) {
2614   SDLoc dl(NatWidthRes);
2615 
2616   // For reinterpreting 32-bit values as 64 bit values, we generate
2617   // INSERT_SUBREG IMPLICIT_DEF:i64, <input>, TargetConstant:i32<1>
2618   if (Conv == ExtOrTruncConversion::Ext) {
2619     SDValue ImDef(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, MVT::i64), 0);
2620     SDValue SubRegIdx =
2621       CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
2622     return SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, MVT::i64,
2623                                           ImDef, NatWidthRes, SubRegIdx), 0);
2624   }
2625 
2626   assert(Conv == ExtOrTruncConversion::Trunc &&
2627          "Unknown convertion between 32 and 64 bit values.");
2628   // For reinterpreting 64-bit values as 32-bit values, we just need to
2629   // EXTRACT_SUBREG (i.e. extract the low word).
2630   SDValue SubRegIdx =
2631     CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
2632   return SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl, MVT::i32,
2633                                         NatWidthRes, SubRegIdx), 0);
2634 }
2635 
2636 // Produce a GPR sequence for compound comparisons (<=, >=) against zero.
2637 // Handle both zero-extensions and sign-extensions.
2638 SDValue
2639 IntegerCompareEliminator::getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl,
2640                                                          ZeroCompare CmpTy) {
2641   EVT InVT = LHS.getValueType();
2642   bool Is32Bit = InVT == MVT::i32;
2643   SDValue ToExtend;
2644 
2645   // Produce the value that needs to be either zero or sign extended.
2646   switch (CmpTy) {
2647   case ZeroCompare::GEZExt:
2648   case ZeroCompare::GESExt:
2649     ToExtend = SDValue(CurDAG->getMachineNode(Is32Bit ? PPC::NOR : PPC::NOR8,
2650                                               dl, InVT, LHS, LHS), 0);
2651     break;
2652   case ZeroCompare::LEZExt:
2653   case ZeroCompare::LESExt: {
2654     if (Is32Bit) {
2655       // Upper 32 bits cannot be undefined for this sequence.
2656       LHS = signExtendInputIfNeeded(LHS);
2657       SDValue Neg =
2658         SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);
2659       ToExtend =
2660         SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
2661                                        Neg, S->getI64Imm(1, dl),
2662                                        S->getI64Imm(63, dl)), 0);
2663     } else {
2664       SDValue Addi =
2665         SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,
2666                                        S->getI64Imm(~0ULL, dl)), 0);
2667       ToExtend = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
2668                                                 Addi, LHS), 0);
2669     }
2670     break;
2671   }
2672   }
2673 
2674   // For 64-bit sequences, the extensions are the same for the GE/LE cases.
2675   if (!Is32Bit &&
2676       (CmpTy == ZeroCompare::GEZExt || CmpTy == ZeroCompare::LEZExt))
2677     return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
2678                                           ToExtend, S->getI64Imm(1, dl),
2679                                           S->getI64Imm(63, dl)), 0);
2680   if (!Is32Bit &&
2681       (CmpTy == ZeroCompare::GESExt || CmpTy == ZeroCompare::LESExt))
2682     return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, ToExtend,
2683                                           S->getI64Imm(63, dl)), 0);
2684 
2685   assert(Is32Bit && "Should have handled the 32-bit sequences above.");
2686   // For 32-bit sequences, the extensions differ between GE/LE cases.
2687   switch (CmpTy) {
2688   case ZeroCompare::GEZExt: {
2689     SDValue ShiftOps[] = { ToExtend, S->getI32Imm(1, dl), S->getI32Imm(31, dl),
2690                            S->getI32Imm(31, dl) };
2691     return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
2692                                           ShiftOps), 0);
2693   }
2694   case ZeroCompare::GESExt:
2695     return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, ToExtend,
2696                                           S->getI32Imm(31, dl)), 0);
2697   case ZeroCompare::LEZExt:
2698     return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, ToExtend,
2699                                           S->getI32Imm(1, dl)), 0);
2700   case ZeroCompare::LESExt:
2701     return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, ToExtend,
2702                                           S->getI32Imm(-1, dl)), 0);
2703   }
2704 
2705   // The above case covers all the enumerators so it can't have a default clause
2706   // to avoid compiler warnings.
2707   llvm_unreachable("Unknown zero-comparison type.");
2708 }
2709 
2710 /// Produces a zero-extended result of comparing two 32-bit values according to
2711 /// the passed condition code.
2712 SDValue
2713 IntegerCompareEliminator::get32BitZExtCompare(SDValue LHS, SDValue RHS,
2714                                               ISD::CondCode CC,
2715                                               int64_t RHSValue, SDLoc dl) {
2716   if (CmpInGPR == ICGPR_I64 || CmpInGPR == ICGPR_SextI64 ||
2717       CmpInGPR == ICGPR_ZextI64 || CmpInGPR == ICGPR_Sext)
2718     return SDValue();
2719   bool IsRHSZero = RHSValue == 0;
2720   bool IsRHSOne = RHSValue == 1;
2721   bool IsRHSNegOne = RHSValue == -1LL;
2722   switch (CC) {
2723   default: return SDValue();
2724   case ISD::SETEQ: {
2725     // (zext (setcc %a, %b, seteq)) -> (lshr (cntlzw (xor %a, %b)), 5)
2726     // (zext (setcc %a, 0, seteq))  -> (lshr (cntlzw %a), 5)
2727     SDValue Xor = IsRHSZero ? LHS :
2728       SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
2729     SDValue Clz =
2730       SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
2731     SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl),
2732       S->getI32Imm(31, dl) };
2733     return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
2734                                           ShiftOps), 0);
2735   }
2736   case ISD::SETNE: {
2737     // (zext (setcc %a, %b, setne)) -> (xor (lshr (cntlzw (xor %a, %b)), 5), 1)
2738     // (zext (setcc %a, 0, setne))  -> (xor (lshr (cntlzw %a), 5), 1)
2739     SDValue Xor = IsRHSZero ? LHS :
2740       SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
2741     SDValue Clz =
2742       SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
2743     SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl),
2744       S->getI32Imm(31, dl) };
2745     SDValue Shift =
2746       SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0);
2747     return SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift,
2748                                           S->getI32Imm(1, dl)), 0);
2749   }
2750   case ISD::SETGE: {
2751     // (zext (setcc %a, %b, setge)) -> (xor (lshr (sub %a, %b), 63), 1)
2752     // (zext (setcc %a, 0, setge))  -> (lshr (~ %a), 31)
2753     if(IsRHSZero)
2754       return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
2755 
2756     // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a)
2757     // by swapping inputs and falling through.
2758     std::swap(LHS, RHS);
2759     ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
2760     IsRHSZero = RHSConst && RHSConst->isNullValue();
2761     LLVM_FALLTHROUGH;
2762   }
2763   case ISD::SETLE: {
2764     if (CmpInGPR == ICGPR_NonExtIn)
2765       return SDValue();
2766     // (zext (setcc %a, %b, setle)) -> (xor (lshr (sub %b, %a), 63), 1)
2767     // (zext (setcc %a, 0, setle))  -> (xor (lshr (- %a), 63), 1)
2768     if(IsRHSZero) {
2769       if (CmpInGPR == ICGPR_NonExtIn)
2770         return SDValue();
2771       return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
2772     }
2773 
2774     // The upper 32-bits of the register can't be undefined for this sequence.
2775     LHS = signExtendInputIfNeeded(LHS);
2776     RHS = signExtendInputIfNeeded(RHS);
2777     SDValue Sub =
2778       SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);
2779     SDValue Shift =
2780       SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Sub,
2781                                      S->getI64Imm(1, dl), S->getI64Imm(63, dl)),
2782               0);
2783     return
2784       SDValue(CurDAG->getMachineNode(PPC::XORI8, dl,
2785                                      MVT::i64, Shift, S->getI32Imm(1, dl)), 0);
2786   }
2787   case ISD::SETGT: {
2788     // (zext (setcc %a, %b, setgt)) -> (lshr (sub %b, %a), 63)
2789     // (zext (setcc %a, -1, setgt)) -> (lshr (~ %a), 31)
2790     // (zext (setcc %a, 0, setgt))  -> (lshr (- %a), 63)
2791     // Handle SETLT -1 (which is equivalent to SETGE 0).
2792     if (IsRHSNegOne)
2793       return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
2794 
2795     if (IsRHSZero) {
2796       if (CmpInGPR == ICGPR_NonExtIn)
2797         return SDValue();
2798       // The upper 32-bits of the register can't be undefined for this sequence.
2799       LHS = signExtendInputIfNeeded(LHS);
2800       RHS = signExtendInputIfNeeded(RHS);
2801       SDValue Neg =
2802         SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);
2803       return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
2804                      Neg, S->getI32Imm(1, dl), S->getI32Imm(63, dl)), 0);
2805     }
2806     // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as
2807     // (%b < %a) by swapping inputs and falling through.
2808     std::swap(LHS, RHS);
2809     ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
2810     IsRHSZero = RHSConst && RHSConst->isNullValue();
2811     IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
2812     LLVM_FALLTHROUGH;
2813   }
2814   case ISD::SETLT: {
2815     // (zext (setcc %a, %b, setlt)) -> (lshr (sub %a, %b), 63)
2816     // (zext (setcc %a, 1, setlt))  -> (xor (lshr (- %a), 63), 1)
2817     // (zext (setcc %a, 0, setlt))  -> (lshr %a, 31)
2818     // Handle SETLT 1 (which is equivalent to SETLE 0).
2819     if (IsRHSOne) {
2820       if (CmpInGPR == ICGPR_NonExtIn)
2821         return SDValue();
2822       return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
2823     }
2824 
2825     if (IsRHSZero) {
2826       SDValue ShiftOps[] = { LHS, S->getI32Imm(1, dl), S->getI32Imm(31, dl),
2827                              S->getI32Imm(31, dl) };
2828       return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
2829                                             ShiftOps), 0);
2830     }
2831 
2832     if (CmpInGPR == ICGPR_NonExtIn)
2833       return SDValue();
2834     // The upper 32-bits of the register can't be undefined for this sequence.
2835     LHS = signExtendInputIfNeeded(LHS);
2836     RHS = signExtendInputIfNeeded(RHS);
2837     SDValue SUBFNode =
2838       SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
2839     return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
2840                                     SUBFNode, S->getI64Imm(1, dl),
2841                                     S->getI64Imm(63, dl)), 0);
2842   }
2843   case ISD::SETUGE:
2844     // (zext (setcc %a, %b, setuge)) -> (xor (lshr (sub %b, %a), 63), 1)
2845     // (zext (setcc %a, %b, setule)) -> (xor (lshr (sub %a, %b), 63), 1)
2846     std::swap(LHS, RHS);
2847     LLVM_FALLTHROUGH;
2848   case ISD::SETULE: {
2849     if (CmpInGPR == ICGPR_NonExtIn)
2850       return SDValue();
2851     // The upper 32-bits of the register can't be undefined for this sequence.
2852     LHS = zeroExtendInputIfNeeded(LHS);
2853     RHS = zeroExtendInputIfNeeded(RHS);
2854     SDValue Subtract =
2855       SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);
2856     SDValue SrdiNode =
2857       SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
2858                                           Subtract, S->getI64Imm(1, dl),
2859                                           S->getI64Imm(63, dl)), 0);
2860     return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, SrdiNode,
2861                                             S->getI32Imm(1, dl)), 0);
2862   }
2863   case ISD::SETUGT:
2864     // (zext (setcc %a, %b, setugt)) -> (lshr (sub %b, %a), 63)
2865     // (zext (setcc %a, %b, setult)) -> (lshr (sub %a, %b), 63)
2866     std::swap(LHS, RHS);
2867     LLVM_FALLTHROUGH;
2868   case ISD::SETULT: {
2869     if (CmpInGPR == ICGPR_NonExtIn)
2870       return SDValue();
2871     // The upper 32-bits of the register can't be undefined for this sequence.
2872     LHS = zeroExtendInputIfNeeded(LHS);
2873     RHS = zeroExtendInputIfNeeded(RHS);
2874     SDValue Subtract =
2875       SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
2876     return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
2877                                           Subtract, S->getI64Imm(1, dl),
2878                                           S->getI64Imm(63, dl)), 0);
2879   }
2880   }
2881 }
2882 
2883 /// Produces a sign-extended result of comparing two 32-bit values according to
2884 /// the passed condition code.
2885 SDValue
2886 IntegerCompareEliminator::get32BitSExtCompare(SDValue LHS, SDValue RHS,
2887                                               ISD::CondCode CC,
2888                                               int64_t RHSValue, SDLoc dl) {
2889   if (CmpInGPR == ICGPR_I64 || CmpInGPR == ICGPR_SextI64 ||
2890       CmpInGPR == ICGPR_ZextI64 || CmpInGPR == ICGPR_Zext)
2891     return SDValue();
2892   bool IsRHSZero = RHSValue == 0;
2893   bool IsRHSOne = RHSValue == 1;
2894   bool IsRHSNegOne = RHSValue == -1LL;
2895 
2896   switch (CC) {
2897   default: return SDValue();
2898   case ISD::SETEQ: {
2899     // (sext (setcc %a, %b, seteq)) ->
2900     //   (ashr (shl (ctlz (xor %a, %b)), 58), 63)
2901     // (sext (setcc %a, 0, seteq)) ->
2902     //   (ashr (shl (ctlz %a), 58), 63)
2903     SDValue CountInput = IsRHSZero ? LHS :
2904       SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
2905     SDValue Cntlzw =
2906       SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, CountInput), 0);
2907     SDValue SHLOps[] = { Cntlzw, S->getI32Imm(27, dl),
2908                          S->getI32Imm(5, dl), S->getI32Imm(31, dl) };
2909     SDValue Slwi =
2910       SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, SHLOps), 0);
2911     return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Slwi), 0);
2912   }
2913   case ISD::SETNE: {
2914     // Bitwise xor the operands, count leading zeros, shift right by 5 bits and
2915     // flip the bit, finally take 2's complement.
2916     // (sext (setcc %a, %b, setne)) ->
2917     //   (neg (xor (lshr (ctlz (xor %a, %b)), 5), 1))
2918     // Same as above, but the first xor is not needed.
2919     // (sext (setcc %a, 0, setne)) ->
2920     //   (neg (xor (lshr (ctlz %a), 5), 1))
2921     SDValue Xor = IsRHSZero ? LHS :
2922       SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
2923     SDValue Clz =
2924       SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
2925     SDValue ShiftOps[] =
2926       { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl), S->getI32Imm(31, dl) };
2927     SDValue Shift =
2928       SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0);
2929     SDValue Xori =
2930       SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift,
2931                                      S->getI32Imm(1, dl)), 0);
2932     return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Xori), 0);
2933   }
2934   case ISD::SETGE: {
2935     // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %a, %b), 63), -1)
2936     // (sext (setcc %a, 0, setge))  -> (ashr (~ %a), 31)
2937     if (IsRHSZero)
2938       return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
2939 
2940     // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a)
2941     // by swapping inputs and falling through.
2942     std::swap(LHS, RHS);
2943     ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
2944     IsRHSZero = RHSConst && RHSConst->isNullValue();
2945     LLVM_FALLTHROUGH;
2946   }
2947   case ISD::SETLE: {
2948     if (CmpInGPR == ICGPR_NonExtIn)
2949       return SDValue();
2950     // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %b, %a), 63), -1)
2951     // (sext (setcc %a, 0, setle))  -> (add (lshr (- %a), 63), -1)
2952     if (IsRHSZero)
2953       return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
2954 
2955     // The upper 32-bits of the register can't be undefined for this sequence.
2956     LHS = signExtendInputIfNeeded(LHS);
2957     RHS = signExtendInputIfNeeded(RHS);
2958     SDValue SUBFNode =
2959       SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, MVT::Glue,
2960                                      LHS, RHS), 0);
2961     SDValue Srdi =
2962       SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
2963                                      SUBFNode, S->getI64Imm(1, dl),
2964                                      S->getI64Imm(63, dl)), 0);
2965     return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Srdi,
2966                                           S->getI32Imm(-1, dl)), 0);
2967   }
2968   case ISD::SETGT: {
2969     // (sext (setcc %a, %b, setgt)) -> (ashr (sub %b, %a), 63)
2970     // (sext (setcc %a, -1, setgt)) -> (ashr (~ %a), 31)
2971     // (sext (setcc %a, 0, setgt))  -> (ashr (- %a), 63)
2972     if (IsRHSNegOne)
2973       return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
2974     if (IsRHSZero) {
2975       if (CmpInGPR == ICGPR_NonExtIn)
2976         return SDValue();
2977       // The upper 32-bits of the register can't be undefined for this sequence.
2978       LHS = signExtendInputIfNeeded(LHS);
2979       RHS = signExtendInputIfNeeded(RHS);
2980       SDValue Neg =
2981         SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);
2982         return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Neg,
2983                                               S->getI64Imm(63, dl)), 0);
2984     }
2985     // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as
2986     // (%b < %a) by swapping inputs and falling through.
2987     std::swap(LHS, RHS);
2988     ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
2989     IsRHSZero = RHSConst && RHSConst->isNullValue();
2990     IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
2991     LLVM_FALLTHROUGH;
2992   }
2993   case ISD::SETLT: {
2994     // (sext (setcc %a, %b, setgt)) -> (ashr (sub %a, %b), 63)
2995     // (sext (setcc %a, 1, setgt))  -> (add (lshr (- %a), 63), -1)
2996     // (sext (setcc %a, 0, setgt))  -> (ashr %a, 31)
2997     if (IsRHSOne) {
2998       if (CmpInGPR == ICGPR_NonExtIn)
2999         return SDValue();
3000       return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3001     }
3002     if (IsRHSZero)
3003       return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, LHS,
3004                                             S->getI32Imm(31, dl)), 0);
3005 
3006     if (CmpInGPR == ICGPR_NonExtIn)
3007       return SDValue();
3008     // The upper 32-bits of the register can't be undefined for this sequence.
3009     LHS = signExtendInputIfNeeded(LHS);
3010     RHS = signExtendInputIfNeeded(RHS);
3011     SDValue SUBFNode =
3012       SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
3013     return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3014                                           SUBFNode, S->getI64Imm(63, dl)), 0);
3015   }
3016   case ISD::SETUGE:
3017     // (sext (setcc %a, %b, setuge)) -> (add (lshr (sub %a, %b), 63), -1)
3018     // (sext (setcc %a, %b, setule)) -> (add (lshr (sub %b, %a), 63), -1)
3019     std::swap(LHS, RHS);
3020     LLVM_FALLTHROUGH;
3021   case ISD::SETULE: {
3022     if (CmpInGPR == ICGPR_NonExtIn)
3023       return SDValue();
3024     // The upper 32-bits of the register can't be undefined for this sequence.
3025     LHS = zeroExtendInputIfNeeded(LHS);
3026     RHS = zeroExtendInputIfNeeded(RHS);
3027     SDValue Subtract =
3028       SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);
3029     SDValue Shift =
3030       SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Subtract,
3031                                      S->getI32Imm(1, dl), S->getI32Imm(63,dl)),
3032               0);
3033     return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Shift,
3034                                           S->getI32Imm(-1, dl)), 0);
3035   }
3036   case ISD::SETUGT:
3037     // (sext (setcc %a, %b, setugt)) -> (ashr (sub %b, %a), 63)
3038     // (sext (setcc %a, %b, setugt)) -> (ashr (sub %a, %b), 63)
3039     std::swap(LHS, RHS);
3040     LLVM_FALLTHROUGH;
3041   case ISD::SETULT: {
3042     if (CmpInGPR == ICGPR_NonExtIn)
3043       return SDValue();
3044     // The upper 32-bits of the register can't be undefined for this sequence.
3045     LHS = zeroExtendInputIfNeeded(LHS);
3046     RHS = zeroExtendInputIfNeeded(RHS);
3047     SDValue Subtract =
3048       SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
3049     return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3050                                           Subtract, S->getI64Imm(63, dl)), 0);
3051   }
3052   }
3053 }
3054 
3055 /// Produces a zero-extended result of comparing two 64-bit values according to
3056 /// the passed condition code.
3057 SDValue
3058 IntegerCompareEliminator::get64BitZExtCompare(SDValue LHS, SDValue RHS,
3059                                               ISD::CondCode CC,
3060                                               int64_t RHSValue, SDLoc dl) {
3061   if (CmpInGPR == ICGPR_I32 || CmpInGPR == ICGPR_SextI32 ||
3062       CmpInGPR == ICGPR_ZextI32 || CmpInGPR == ICGPR_Sext)
3063     return SDValue();
3064   bool IsRHSZero = RHSValue == 0;
3065   bool IsRHSOne = RHSValue == 1;
3066   bool IsRHSNegOne = RHSValue == -1LL;
3067   switch (CC) {
3068   default: return SDValue();
3069   case ISD::SETEQ: {
3070     // (zext (setcc %a, %b, seteq)) -> (lshr (ctlz (xor %a, %b)), 6)
3071     // (zext (setcc %a, 0, seteq)) ->  (lshr (ctlz %a), 6)
3072     SDValue Xor = IsRHSZero ? LHS :
3073       SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
3074     SDValue Clz =
3075       SDValue(CurDAG->getMachineNode(PPC::CNTLZD, dl, MVT::i64, Xor), 0);
3076     return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Clz,
3077                                           S->getI64Imm(58, dl),
3078                                           S->getI64Imm(63, dl)), 0);
3079   }
3080   case ISD::SETNE: {
3081     // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1)
3082     // (zext (setcc %a, %b, setne)) -> (sube addc.reg, addc.reg, addc.CA)
3083     // {addcz.reg, addcz.CA} = (addcarry %a, -1)
3084     // (zext (setcc %a, 0, setne)) -> (sube addcz.reg, addcz.reg, addcz.CA)
3085     SDValue Xor = IsRHSZero ? LHS :
3086       SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
3087     SDValue AC =
3088       SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue,
3089                                      Xor, S->getI32Imm(~0U, dl)), 0);
3090     return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, AC,
3091                                           Xor, AC.getValue(1)), 0);
3092   }
3093   case ISD::SETGE: {
3094     // {subc.reg, subc.CA} = (subcarry %a, %b)
3095     // (zext (setcc %a, %b, setge)) ->
3096     //   (adde (lshr %b, 63), (ashr %a, 63), subc.CA)
3097     // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 63)
3098     if (IsRHSZero)
3099       return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
3100     std::swap(LHS, RHS);
3101     ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3102     IsRHSZero = RHSConst && RHSConst->isNullValue();
3103     LLVM_FALLTHROUGH;
3104   }
3105   case ISD::SETLE: {
3106     // {subc.reg, subc.CA} = (subcarry %b, %a)
3107     // (zext (setcc %a, %b, setge)) ->
3108     //   (adde (lshr %a, 63), (ashr %b, 63), subc.CA)
3109     // (zext (setcc %a, 0, setge)) -> (lshr (or %a, (add %a, -1)), 63)
3110     if (IsRHSZero)
3111       return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
3112     SDValue ShiftL =
3113       SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,
3114                                      S->getI64Imm(1, dl),
3115                                      S->getI64Imm(63, dl)), 0);
3116     SDValue ShiftR =
3117       SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS,
3118                                      S->getI64Imm(63, dl)), 0);
3119     SDValue SubtractCarry =
3120       SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3121                                      LHS, RHS), 1);
3122     return SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,
3123                                           ShiftR, ShiftL, SubtractCarry), 0);
3124   }
3125   case ISD::SETGT: {
3126     // {subc.reg, subc.CA} = (subcarry %b, %a)
3127     // (zext (setcc %a, %b, setgt)) ->
3128     //   (xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1)
3129     // (zext (setcc %a, 0, setgt)) -> (lshr (nor (add %a, -1), %a), 63)
3130     if (IsRHSNegOne)
3131       return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
3132     if (IsRHSZero) {
3133       SDValue Addi =
3134         SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,
3135                                        S->getI64Imm(~0ULL, dl)), 0);
3136       SDValue Nor =
3137         SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Addi, LHS), 0);
3138       return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Nor,
3139                                             S->getI64Imm(1, dl),
3140                                             S->getI64Imm(63, dl)), 0);
3141     }
3142     std::swap(LHS, RHS);
3143     ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3144     IsRHSZero = RHSConst && RHSConst->isNullValue();
3145     IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
3146     LLVM_FALLTHROUGH;
3147   }
3148   case ISD::SETLT: {
3149     // {subc.reg, subc.CA} = (subcarry %a, %b)
3150     // (zext (setcc %a, %b, setlt)) ->
3151     //   (xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1)
3152     // (zext (setcc %a, 0, setlt)) -> (lshr %a, 63)
3153     if (IsRHSOne)
3154       return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
3155     if (IsRHSZero)
3156       return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,
3157                                             S->getI64Imm(1, dl),
3158                                             S->getI64Imm(63, dl)), 0);
3159     SDValue SRADINode =
3160       SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3161                                      LHS, S->getI64Imm(63, dl)), 0);
3162     SDValue SRDINode =
3163       SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3164                                      RHS, S->getI64Imm(1, dl),
3165                                      S->getI64Imm(63, dl)), 0);
3166     SDValue SUBFC8Carry =
3167       SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3168                                      RHS, LHS), 1);
3169     SDValue ADDE8Node =
3170       SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,
3171                                      SRDINode, SRADINode, SUBFC8Carry), 0);
3172     return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,
3173                                           ADDE8Node, S->getI64Imm(1, dl)), 0);
3174   }
3175   case ISD::SETUGE:
3176     // {subc.reg, subc.CA} = (subcarry %a, %b)
3177     // (zext (setcc %a, %b, setuge)) -> (add (sube %b, %b, subc.CA), 1)
3178     std::swap(LHS, RHS);
3179     LLVM_FALLTHROUGH;
3180   case ISD::SETULE: {
3181     // {subc.reg, subc.CA} = (subcarry %b, %a)
3182     // (zext (setcc %a, %b, setule)) -> (add (sube %a, %a, subc.CA), 1)
3183     SDValue SUBFC8Carry =
3184       SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3185                                      LHS, RHS), 1);
3186     SDValue SUBFE8Node =
3187       SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue,
3188                                      LHS, LHS, SUBFC8Carry), 0);
3189     return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64,
3190                                           SUBFE8Node, S->getI64Imm(1, dl)), 0);
3191   }
3192   case ISD::SETUGT:
3193     // {subc.reg, subc.CA} = (subcarry %b, %a)
3194     // (zext (setcc %a, %b, setugt)) -> -(sube %b, %b, subc.CA)
3195     std::swap(LHS, RHS);
3196     LLVM_FALLTHROUGH;
3197   case ISD::SETULT: {
3198     // {subc.reg, subc.CA} = (subcarry %a, %b)
3199     // (zext (setcc %a, %b, setult)) -> -(sube %a, %a, subc.CA)
3200     SDValue SubtractCarry =
3201       SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3202                                      RHS, LHS), 1);
3203     SDValue ExtSub =
3204       SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64,
3205                                      LHS, LHS, SubtractCarry), 0);
3206     return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64,
3207                                           ExtSub), 0);
3208   }
3209   }
3210 }
3211 
3212 /// Produces a sign-extended result of comparing two 64-bit values according to
3213 /// the passed condition code.
3214 SDValue
3215 IntegerCompareEliminator::get64BitSExtCompare(SDValue LHS, SDValue RHS,
3216                                               ISD::CondCode CC,
3217                                               int64_t RHSValue, SDLoc dl) {
3218   if (CmpInGPR == ICGPR_I32 || CmpInGPR == ICGPR_SextI32 ||
3219       CmpInGPR == ICGPR_ZextI32 || CmpInGPR == ICGPR_Zext)
3220     return SDValue();
3221   bool IsRHSZero = RHSValue == 0;
3222   bool IsRHSOne = RHSValue == 1;
3223   bool IsRHSNegOne = RHSValue == -1LL;
3224   switch (CC) {
3225   default: return SDValue();
3226   case ISD::SETEQ: {
3227     // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1)
3228     // (sext (setcc %a, %b, seteq)) -> (sube addc.reg, addc.reg, addc.CA)
3229     // {addcz.reg, addcz.CA} = (addcarry %a, -1)
3230     // (sext (setcc %a, 0, seteq)) -> (sube addcz.reg, addcz.reg, addcz.CA)
3231     SDValue AddInput = IsRHSZero ? LHS :
3232       SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
3233     SDValue Addic =
3234       SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue,
3235                                      AddInput, S->getI32Imm(~0U, dl)), 0);
3236     return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, Addic,
3237                                           Addic, Addic.getValue(1)), 0);
3238   }
3239   case ISD::SETNE: {
3240     // {subfc.reg, subfc.CA} = (subcarry 0, (xor %a, %b))
3241     // (sext (setcc %a, %b, setne)) -> (sube subfc.reg, subfc.reg, subfc.CA)
3242     // {subfcz.reg, subfcz.CA} = (subcarry 0, %a)
3243     // (sext (setcc %a, 0, setne)) -> (sube subfcz.reg, subfcz.reg, subfcz.CA)
3244     SDValue Xor = IsRHSZero ? LHS :
3245       SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
3246     SDValue SC =
3247       SDValue(CurDAG->getMachineNode(PPC::SUBFIC8, dl, MVT::i64, MVT::Glue,
3248                                      Xor, S->getI32Imm(0, dl)), 0);
3249     return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, SC,
3250                                           SC, SC.getValue(1)), 0);
3251   }
3252   case ISD::SETGE: {
3253     // {subc.reg, subc.CA} = (subcarry %a, %b)
3254     // (zext (setcc %a, %b, setge)) ->
3255     //   (- (adde (lshr %b, 63), (ashr %a, 63), subc.CA))
3256     // (zext (setcc %a, 0, setge)) -> (~ (ashr %a, 63))
3257     if (IsRHSZero)
3258       return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3259     std::swap(LHS, RHS);
3260     ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3261     IsRHSZero = RHSConst && RHSConst->isNullValue();
3262     LLVM_FALLTHROUGH;
3263   }
3264   case ISD::SETLE: {
3265     // {subc.reg, subc.CA} = (subcarry %b, %a)
3266     // (zext (setcc %a, %b, setge)) ->
3267     //   (- (adde (lshr %a, 63), (ashr %b, 63), subc.CA))
3268     // (zext (setcc %a, 0, setge)) -> (ashr (or %a, (add %a, -1)), 63)
3269     if (IsRHSZero)
3270       return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3271     SDValue ShiftR =
3272       SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS,
3273                                      S->getI64Imm(63, dl)), 0);
3274     SDValue ShiftL =
3275       SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,
3276                                      S->getI64Imm(1, dl),
3277                                      S->getI64Imm(63, dl)), 0);
3278     SDValue SubtractCarry =
3279       SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3280                                      LHS, RHS), 1);
3281     SDValue Adde =
3282       SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,
3283                                      ShiftR, ShiftL, SubtractCarry), 0);
3284     return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, Adde), 0);
3285   }
3286   case ISD::SETGT: {
3287     // {subc.reg, subc.CA} = (subcarry %b, %a)
3288     // (zext (setcc %a, %b, setgt)) ->
3289     //   -(xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1)
3290     // (zext (setcc %a, 0, setgt)) -> (ashr (nor (add %a, -1), %a), 63)
3291     if (IsRHSNegOne)
3292       return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3293     if (IsRHSZero) {
3294       SDValue Add =
3295         SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,
3296                                        S->getI64Imm(-1, dl)), 0);
3297       SDValue Nor =
3298         SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Add, LHS), 0);
3299       return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Nor,
3300                                             S->getI64Imm(63, dl)), 0);
3301     }
3302     std::swap(LHS, RHS);
3303     ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3304     IsRHSZero = RHSConst && RHSConst->isNullValue();
3305     IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
3306     LLVM_FALLTHROUGH;
3307   }
3308   case ISD::SETLT: {
3309     // {subc.reg, subc.CA} = (subcarry %a, %b)
3310     // (zext (setcc %a, %b, setlt)) ->
3311     //   -(xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1)
3312     // (zext (setcc %a, 0, setlt)) -> (ashr %a, 63)
3313     if (IsRHSOne)
3314       return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3315     if (IsRHSZero) {
3316       return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, LHS,
3317                                             S->getI64Imm(63, dl)), 0);
3318     }
3319     SDValue SRADINode =
3320       SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3321                                      LHS, S->getI64Imm(63, dl)), 0);
3322     SDValue SRDINode =
3323       SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3324                                      RHS, S->getI64Imm(1, dl),
3325                                      S->getI64Imm(63, dl)), 0);
3326     SDValue SUBFC8Carry =
3327       SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3328                                      RHS, LHS), 1);
3329     SDValue ADDE8Node =
3330       SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64,
3331                                      SRDINode, SRADINode, SUBFC8Carry), 0);
3332     SDValue XORI8Node =
3333       SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,
3334                                      ADDE8Node, S->getI64Imm(1, dl)), 0);
3335     return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64,
3336                                           XORI8Node), 0);
3337   }
3338   case ISD::SETUGE:
3339     // {subc.reg, subc.CA} = (subcarry %a, %b)
3340     // (sext (setcc %a, %b, setuge)) -> ~(sube %b, %b, subc.CA)
3341     std::swap(LHS, RHS);
3342     LLVM_FALLTHROUGH;
3343   case ISD::SETULE: {
3344     // {subc.reg, subc.CA} = (subcarry %b, %a)
3345     // (sext (setcc %a, %b, setule)) -> ~(sube %a, %a, subc.CA)
3346     SDValue SubtractCarry =
3347       SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3348                                      LHS, RHS), 1);
3349     SDValue ExtSub =
3350       SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue, LHS,
3351                                      LHS, SubtractCarry), 0);
3352     return SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64,
3353                                           ExtSub, ExtSub), 0);
3354   }
3355   case ISD::SETUGT:
3356     // {subc.reg, subc.CA} = (subcarry %b, %a)
3357     // (sext (setcc %a, %b, setugt)) -> (sube %b, %b, subc.CA)
3358     std::swap(LHS, RHS);
3359     LLVM_FALLTHROUGH;
3360   case ISD::SETULT: {
3361     // {subc.reg, subc.CA} = (subcarry %a, %b)
3362     // (sext (setcc %a, %b, setult)) -> (sube %a, %a, subc.CA)
3363     SDValue SubCarry =
3364       SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3365                                      RHS, LHS), 1);
3366     return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64,
3367                                      LHS, LHS, SubCarry), 0);
3368   }
3369   }
3370 }
3371 
3372 /// Do all uses of this SDValue need the result in a GPR?
3373 /// This is meant to be used on values that have type i1 since
3374 /// it is somewhat meaningless to ask if values of other types
3375 /// should be kept in GPR's.
3376 static bool allUsesExtend(SDValue Compare, SelectionDAG *CurDAG) {
3377   assert(Compare.getOpcode() == ISD::SETCC &&
3378          "An ISD::SETCC node required here.");
3379 
3380   // For values that have a single use, the caller should obviously already have
3381   // checked if that use is an extending use. We check the other uses here.
3382   if (Compare.hasOneUse())
3383     return true;
3384   // We want the value in a GPR if it is being extended, used for a select, or
3385   // used in logical operations.
3386   for (auto CompareUse : Compare.getNode()->uses())
3387     if (CompareUse->getOpcode() != ISD::SIGN_EXTEND &&
3388         CompareUse->getOpcode() != ISD::ZERO_EXTEND &&
3389         CompareUse->getOpcode() != ISD::SELECT &&
3390         !isLogicOp(CompareUse->getOpcode())) {
3391       OmittedForNonExtendUses++;
3392       return false;
3393     }
3394   return true;
3395 }
3396 
3397 /// Returns an equivalent of a SETCC node but with the result the same width as
3398 /// the inputs. This can nalso be used for SELECT_CC if either the true or false
3399 /// values is a power of two while the other is zero.
3400 SDValue IntegerCompareEliminator::getSETCCInGPR(SDValue Compare,
3401                                                 SetccInGPROpts ConvOpts) {
3402   assert((Compare.getOpcode() == ISD::SETCC ||
3403           Compare.getOpcode() == ISD::SELECT_CC) &&
3404          "An ISD::SETCC node required here.");
3405 
3406   // Don't convert this comparison to a GPR sequence because there are uses
3407   // of the i1 result (i.e. uses that require the result in the CR).
3408   if ((Compare.getOpcode() == ISD::SETCC) && !allUsesExtend(Compare, CurDAG))
3409     return SDValue();
3410 
3411   SDValue LHS = Compare.getOperand(0);
3412   SDValue RHS = Compare.getOperand(1);
3413 
3414   // The condition code is operand 2 for SETCC and operand 4 for SELECT_CC.
3415   int CCOpNum = Compare.getOpcode() == ISD::SELECT_CC ? 4 : 2;
3416   ISD::CondCode CC =
3417     cast<CondCodeSDNode>(Compare.getOperand(CCOpNum))->get();
3418   EVT InputVT = LHS.getValueType();
3419   if (InputVT != MVT::i32 && InputVT != MVT::i64)
3420     return SDValue();
3421 
3422   if (ConvOpts == SetccInGPROpts::ZExtInvert ||
3423       ConvOpts == SetccInGPROpts::SExtInvert)
3424     CC = ISD::getSetCCInverse(CC, true);
3425 
3426   bool Inputs32Bit = InputVT == MVT::i32;
3427 
3428   SDLoc dl(Compare);
3429   ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3430   int64_t RHSValue = RHSConst ? RHSConst->getSExtValue() : INT64_MAX;
3431   bool IsSext = ConvOpts == SetccInGPROpts::SExtOrig ||
3432     ConvOpts == SetccInGPROpts::SExtInvert;
3433 
3434   if (IsSext && Inputs32Bit)
3435     return get32BitSExtCompare(LHS, RHS, CC, RHSValue, dl);
3436   else if (Inputs32Bit)
3437     return get32BitZExtCompare(LHS, RHS, CC, RHSValue, dl);
3438   else if (IsSext)
3439     return get64BitSExtCompare(LHS, RHS, CC, RHSValue, dl);
3440   return get64BitZExtCompare(LHS, RHS, CC, RHSValue, dl);
3441 }
3442 
3443 } // end anonymous namespace
3444 
3445 bool PPCDAGToDAGISel::tryIntCompareInGPR(SDNode *N) {
3446   if (N->getValueType(0) != MVT::i32 &&
3447       N->getValueType(0) != MVT::i64)
3448     return false;
3449 
3450   // This optimization will emit code that assumes 64-bit registers
3451   // so we don't want to run it in 32-bit mode. Also don't run it
3452   // on functions that are not to be optimized.
3453   if (TM.getOptLevel() == CodeGenOpt::None || !TM.isPPC64())
3454     return false;
3455 
3456   switch (N->getOpcode()) {
3457   default: break;
3458   case ISD::ZERO_EXTEND:
3459   case ISD::SIGN_EXTEND:
3460   case ISD::AND:
3461   case ISD::OR:
3462   case ISD::XOR: {
3463     IntegerCompareEliminator ICmpElim(CurDAG, this);
3464     if (SDNode *New = ICmpElim.Select(N)) {
3465       ReplaceNode(N, New);
3466       return true;
3467     }
3468   }
3469   }
3470   return false;
3471 }
3472 
3473 bool PPCDAGToDAGISel::tryBitPermutation(SDNode *N) {
3474   if (N->getValueType(0) != MVT::i32 &&
3475       N->getValueType(0) != MVT::i64)
3476     return false;
3477 
3478   if (!UseBitPermRewriter)
3479     return false;
3480 
3481   switch (N->getOpcode()) {
3482   default: break;
3483   case ISD::ROTL:
3484   case ISD::SHL:
3485   case ISD::SRL:
3486   case ISD::AND:
3487   case ISD::OR: {
3488     BitPermutationSelector BPS(CurDAG);
3489     if (SDNode *New = BPS.Select(N)) {
3490       ReplaceNode(N, New);
3491       return true;
3492     }
3493     return false;
3494   }
3495   }
3496 
3497   return false;
3498 }
3499 
3500 /// SelectCC - Select a comparison of the specified values with the specified
3501 /// condition code, returning the CR# of the expression.
3502 SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
3503                                   const SDLoc &dl) {
3504   // Always select the LHS.
3505   unsigned Opc;
3506 
3507   if (LHS.getValueType() == MVT::i32) {
3508     unsigned Imm;
3509     if (CC == ISD::SETEQ || CC == ISD::SETNE) {
3510       if (isInt32Immediate(RHS, Imm)) {
3511         // SETEQ/SETNE comparison with 16-bit immediate, fold it.
3512         if (isUInt<16>(Imm))
3513           return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
3514                                                 getI32Imm(Imm & 0xFFFF, dl)),
3515                          0);
3516         // If this is a 16-bit signed immediate, fold it.
3517         if (isInt<16>((int)Imm))
3518           return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
3519                                                 getI32Imm(Imm & 0xFFFF, dl)),
3520                          0);
3521 
3522         // For non-equality comparisons, the default code would materialize the
3523         // constant, then compare against it, like this:
3524         //   lis r2, 4660
3525         //   ori r2, r2, 22136
3526         //   cmpw cr0, r3, r2
3527         // Since we are just comparing for equality, we can emit this instead:
3528         //   xoris r0,r3,0x1234
3529         //   cmplwi cr0,r0,0x5678
3530         //   beq cr0,L6
3531         SDValue Xor(CurDAG->getMachineNode(PPC::XORIS, dl, MVT::i32, LHS,
3532                                            getI32Imm(Imm >> 16, dl)), 0);
3533         return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, Xor,
3534                                               getI32Imm(Imm & 0xFFFF, dl)), 0);
3535       }
3536       Opc = PPC::CMPLW;
3537     } else if (ISD::isUnsignedIntSetCC(CC)) {
3538       if (isInt32Immediate(RHS, Imm) && isUInt<16>(Imm))
3539         return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
3540                                               getI32Imm(Imm & 0xFFFF, dl)), 0);
3541       Opc = PPC::CMPLW;
3542     } else {
3543       int16_t SImm;
3544       if (isIntS16Immediate(RHS, SImm))
3545         return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
3546                                               getI32Imm((int)SImm & 0xFFFF,
3547                                                         dl)),
3548                          0);
3549       Opc = PPC::CMPW;
3550     }
3551   } else if (LHS.getValueType() == MVT::i64) {
3552     uint64_t Imm;
3553     if (CC == ISD::SETEQ || CC == ISD::SETNE) {
3554       if (isInt64Immediate(RHS.getNode(), Imm)) {
3555         // SETEQ/SETNE comparison with 16-bit immediate, fold it.
3556         if (isUInt<16>(Imm))
3557           return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
3558                                                 getI32Imm(Imm & 0xFFFF, dl)),
3559                          0);
3560         // If this is a 16-bit signed immediate, fold it.
3561         if (isInt<16>(Imm))
3562           return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
3563                                                 getI32Imm(Imm & 0xFFFF, dl)),
3564                          0);
3565 
3566         // For non-equality comparisons, the default code would materialize the
3567         // constant, then compare against it, like this:
3568         //   lis r2, 4660
3569         //   ori r2, r2, 22136
3570         //   cmpd cr0, r3, r2
3571         // Since we are just comparing for equality, we can emit this instead:
3572         //   xoris r0,r3,0x1234
3573         //   cmpldi cr0,r0,0x5678
3574         //   beq cr0,L6
3575         if (isUInt<32>(Imm)) {
3576           SDValue Xor(CurDAG->getMachineNode(PPC::XORIS8, dl, MVT::i64, LHS,
3577                                              getI64Imm(Imm >> 16, dl)), 0);
3578           return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, Xor,
3579                                                 getI64Imm(Imm & 0xFFFF, dl)),
3580                          0);
3581         }
3582       }
3583       Opc = PPC::CMPLD;
3584     } else if (ISD::isUnsignedIntSetCC(CC)) {
3585       if (isInt64Immediate(RHS.getNode(), Imm) && isUInt<16>(Imm))
3586         return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
3587                                               getI64Imm(Imm & 0xFFFF, dl)), 0);
3588       Opc = PPC::CMPLD;
3589     } else {
3590       int16_t SImm;
3591       if (isIntS16Immediate(RHS, SImm))
3592         return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
3593                                               getI64Imm(SImm & 0xFFFF, dl)),
3594                          0);
3595       Opc = PPC::CMPD;
3596     }
3597   } else if (LHS.getValueType() == MVT::f32) {
3598     Opc = PPC::FCMPUS;
3599   } else {
3600     assert(LHS.getValueType() == MVT::f64 && "Unknown vt!");
3601     Opc = PPCSubTarget->hasVSX() ? PPC::XSCMPUDP : PPC::FCMPUD;
3602   }
3603   return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0);
3604 }
3605 
3606 static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC) {
3607   switch (CC) {
3608   case ISD::SETUEQ:
3609   case ISD::SETONE:
3610   case ISD::SETOLE:
3611   case ISD::SETOGE:
3612     llvm_unreachable("Should be lowered by legalize!");
3613   default: llvm_unreachable("Unknown condition!");
3614   case ISD::SETOEQ:
3615   case ISD::SETEQ:  return PPC::PRED_EQ;
3616   case ISD::SETUNE:
3617   case ISD::SETNE:  return PPC::PRED_NE;
3618   case ISD::SETOLT:
3619   case ISD::SETLT:  return PPC::PRED_LT;
3620   case ISD::SETULE:
3621   case ISD::SETLE:  return PPC::PRED_LE;
3622   case ISD::SETOGT:
3623   case ISD::SETGT:  return PPC::PRED_GT;
3624   case ISD::SETUGE:
3625   case ISD::SETGE:  return PPC::PRED_GE;
3626   case ISD::SETO:   return PPC::PRED_NU;
3627   case ISD::SETUO:  return PPC::PRED_UN;
3628     // These two are invalid for floating point.  Assume we have int.
3629   case ISD::SETULT: return PPC::PRED_LT;
3630   case ISD::SETUGT: return PPC::PRED_GT;
3631   }
3632 }
3633 
3634 /// getCRIdxForSetCC - Return the index of the condition register field
3635 /// associated with the SetCC condition, and whether or not the field is
3636 /// treated as inverted.  That is, lt = 0; ge = 0 inverted.
3637 static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) {
3638   Invert = false;
3639   switch (CC) {
3640   default: llvm_unreachable("Unknown condition!");
3641   case ISD::SETOLT:
3642   case ISD::SETLT:  return 0;                  // Bit #0 = SETOLT
3643   case ISD::SETOGT:
3644   case ISD::SETGT:  return 1;                  // Bit #1 = SETOGT
3645   case ISD::SETOEQ:
3646   case ISD::SETEQ:  return 2;                  // Bit #2 = SETOEQ
3647   case ISD::SETUO:  return 3;                  // Bit #3 = SETUO
3648   case ISD::SETUGE:
3649   case ISD::SETGE:  Invert = true; return 0;   // !Bit #0 = SETUGE
3650   case ISD::SETULE:
3651   case ISD::SETLE:  Invert = true; return 1;   // !Bit #1 = SETULE
3652   case ISD::SETUNE:
3653   case ISD::SETNE:  Invert = true; return 2;   // !Bit #2 = SETUNE
3654   case ISD::SETO:   Invert = true; return 3;   // !Bit #3 = SETO
3655   case ISD::SETUEQ:
3656   case ISD::SETOGE:
3657   case ISD::SETOLE:
3658   case ISD::SETONE:
3659     llvm_unreachable("Invalid branch code: should be expanded by legalize");
3660   // These are invalid for floating point.  Assume integer.
3661   case ISD::SETULT: return 0;
3662   case ISD::SETUGT: return 1;
3663   }
3664 }
3665 
3666 // getVCmpInst: return the vector compare instruction for the specified
3667 // vector type and condition code. Since this is for altivec specific code,
3668 // only support the altivec types (v16i8, v8i16, v4i32, v2i64, and v4f32).
3669 static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
3670                                 bool HasVSX, bool &Swap, bool &Negate) {
3671   Swap = false;
3672   Negate = false;
3673 
3674   if (VecVT.isFloatingPoint()) {
3675     /* Handle some cases by swapping input operands.  */
3676     switch (CC) {
3677       case ISD::SETLE: CC = ISD::SETGE; Swap = true; break;
3678       case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
3679       case ISD::SETOLE: CC = ISD::SETOGE; Swap = true; break;
3680       case ISD::SETOLT: CC = ISD::SETOGT; Swap = true; break;
3681       case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
3682       case ISD::SETUGT: CC = ISD::SETULT; Swap = true; break;
3683       default: break;
3684     }
3685     /* Handle some cases by negating the result.  */
3686     switch (CC) {
3687       case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
3688       case ISD::SETUNE: CC = ISD::SETOEQ; Negate = true; break;
3689       case ISD::SETULE: CC = ISD::SETOGT; Negate = true; break;
3690       case ISD::SETULT: CC = ISD::SETOGE; Negate = true; break;
3691       default: break;
3692     }
3693     /* We have instructions implementing the remaining cases.  */
3694     switch (CC) {
3695       case ISD::SETEQ:
3696       case ISD::SETOEQ:
3697         if (VecVT == MVT::v4f32)
3698           return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP;
3699         else if (VecVT == MVT::v2f64)
3700           return PPC::XVCMPEQDP;
3701         break;
3702       case ISD::SETGT:
3703       case ISD::SETOGT:
3704         if (VecVT == MVT::v4f32)
3705           return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP;
3706         else if (VecVT == MVT::v2f64)
3707           return PPC::XVCMPGTDP;
3708         break;
3709       case ISD::SETGE:
3710       case ISD::SETOGE:
3711         if (VecVT == MVT::v4f32)
3712           return HasVSX ? PPC::XVCMPGESP : PPC::VCMPGEFP;
3713         else if (VecVT == MVT::v2f64)
3714           return PPC::XVCMPGEDP;
3715         break;
3716       default:
3717         break;
3718     }
3719     llvm_unreachable("Invalid floating-point vector compare condition");
3720   } else {
3721     /* Handle some cases by swapping input operands.  */
3722     switch (CC) {
3723       case ISD::SETGE: CC = ISD::SETLE; Swap = true; break;
3724       case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
3725       case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
3726       case ISD::SETULT: CC = ISD::SETUGT; Swap = true; break;
3727       default: break;
3728     }
3729     /* Handle some cases by negating the result.  */
3730     switch (CC) {
3731       case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
3732       case ISD::SETUNE: CC = ISD::SETUEQ; Negate = true; break;
3733       case ISD::SETLE: CC = ISD::SETGT; Negate = true; break;
3734       case ISD::SETULE: CC = ISD::SETUGT; Negate = true; break;
3735       default: break;
3736     }
3737     /* We have instructions implementing the remaining cases.  */
3738     switch (CC) {
3739       case ISD::SETEQ:
3740       case ISD::SETUEQ:
3741         if (VecVT == MVT::v16i8)
3742           return PPC::VCMPEQUB;
3743         else if (VecVT == MVT::v8i16)
3744           return PPC::VCMPEQUH;
3745         else if (VecVT == MVT::v4i32)
3746           return PPC::VCMPEQUW;
3747         else if (VecVT == MVT::v2i64)
3748           return PPC::VCMPEQUD;
3749         break;
3750       case ISD::SETGT:
3751         if (VecVT == MVT::v16i8)
3752           return PPC::VCMPGTSB;
3753         else if (VecVT == MVT::v8i16)
3754           return PPC::VCMPGTSH;
3755         else if (VecVT == MVT::v4i32)
3756           return PPC::VCMPGTSW;
3757         else if (VecVT == MVT::v2i64)
3758           return PPC::VCMPGTSD;
3759         break;
3760       case ISD::SETUGT:
3761         if (VecVT == MVT::v16i8)
3762           return PPC::VCMPGTUB;
3763         else if (VecVT == MVT::v8i16)
3764           return PPC::VCMPGTUH;
3765         else if (VecVT == MVT::v4i32)
3766           return PPC::VCMPGTUW;
3767         else if (VecVT == MVT::v2i64)
3768           return PPC::VCMPGTUD;
3769         break;
3770       default:
3771         break;
3772     }
3773     llvm_unreachable("Invalid integer vector compare condition");
3774   }
3775 }
3776 
3777 bool PPCDAGToDAGISel::trySETCC(SDNode *N) {
3778   SDLoc dl(N);
3779   unsigned Imm;
3780   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
3781   EVT PtrVT =
3782       CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout());
3783   bool isPPC64 = (PtrVT == MVT::i64);
3784 
3785   if (!PPCSubTarget->useCRBits() &&
3786       isInt32Immediate(N->getOperand(1), Imm)) {
3787     // We can codegen setcc op, imm very efficiently compared to a brcond.
3788     // Check for those cases here.
3789     // setcc op, 0
3790     if (Imm == 0) {
3791       SDValue Op = N->getOperand(0);
3792       switch (CC) {
3793       default: break;
3794       case ISD::SETEQ: {
3795         Op = SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Op), 0);
3796         SDValue Ops[] = { Op, getI32Imm(27, dl), getI32Imm(5, dl),
3797                           getI32Imm(31, dl) };
3798         CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
3799         return true;
3800       }
3801       case ISD::SETNE: {
3802         if (isPPC64) break;
3803         SDValue AD =
3804           SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
3805                                          Op, getI32Imm(~0U, dl)), 0);
3806         CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, AD, Op, AD.getValue(1));
3807         return true;
3808       }
3809       case ISD::SETLT: {
3810         SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl),
3811                           getI32Imm(31, dl) };
3812         CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
3813         return true;
3814       }
3815       case ISD::SETGT: {
3816         SDValue T =
3817           SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Op), 0);
3818         T = SDValue(CurDAG->getMachineNode(PPC::ANDC, dl, MVT::i32, T, Op), 0);
3819         SDValue Ops[] = { T, getI32Imm(1, dl), getI32Imm(31, dl),
3820                           getI32Imm(31, dl) };
3821         CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
3822         return true;
3823       }
3824       }
3825     } else if (Imm == ~0U) {        // setcc op, -1
3826       SDValue Op = N->getOperand(0);
3827       switch (CC) {
3828       default: break;
3829       case ISD::SETEQ:
3830         if (isPPC64) break;
3831         Op = SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
3832                                             Op, getI32Imm(1, dl)), 0);
3833         CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,
3834                              SDValue(CurDAG->getMachineNode(PPC::LI, dl,
3835                                                             MVT::i32,
3836                                                             getI32Imm(0, dl)),
3837                                      0), Op.getValue(1));
3838         return true;
3839       case ISD::SETNE: {
3840         if (isPPC64) break;
3841         Op = SDValue(CurDAG->getMachineNode(PPC::NOR, dl, MVT::i32, Op, Op), 0);
3842         SDNode *AD = CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
3843                                             Op, getI32Imm(~0U, dl));
3844         CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(AD, 0), Op,
3845                              SDValue(AD, 1));
3846         return true;
3847       }
3848       case ISD::SETLT: {
3849         SDValue AD = SDValue(CurDAG->getMachineNode(PPC::ADDI, dl, MVT::i32, Op,
3850                                                     getI32Imm(1, dl)), 0);
3851         SDValue AN = SDValue(CurDAG->getMachineNode(PPC::AND, dl, MVT::i32, AD,
3852                                                     Op), 0);
3853         SDValue Ops[] = { AN, getI32Imm(1, dl), getI32Imm(31, dl),
3854                           getI32Imm(31, dl) };
3855         CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
3856         return true;
3857       }
3858       case ISD::SETGT: {
3859         SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl),
3860                           getI32Imm(31, dl) };
3861         Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
3862         CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op, getI32Imm(1, dl));
3863         return true;
3864       }
3865       }
3866     }
3867   }
3868 
3869   SDValue LHS = N->getOperand(0);
3870   SDValue RHS = N->getOperand(1);
3871 
3872   // Altivec Vector compare instructions do not set any CR register by default and
3873   // vector compare operations return the same type as the operands.
3874   if (LHS.getValueType().isVector()) {
3875     if (PPCSubTarget->hasQPX())
3876       return false;
3877 
3878     EVT VecVT = LHS.getValueType();
3879     bool Swap, Negate;
3880     unsigned int VCmpInst = getVCmpInst(VecVT.getSimpleVT(), CC,
3881                                         PPCSubTarget->hasVSX(), Swap, Negate);
3882     if (Swap)
3883       std::swap(LHS, RHS);
3884 
3885     EVT ResVT = VecVT.changeVectorElementTypeToInteger();
3886     if (Negate) {
3887       SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, ResVT, LHS, RHS), 0);
3888       CurDAG->SelectNodeTo(N, PPCSubTarget->hasVSX() ? PPC::XXLNOR : PPC::VNOR,
3889                            ResVT, VCmp, VCmp);
3890       return true;
3891     }
3892 
3893     CurDAG->SelectNodeTo(N, VCmpInst, ResVT, LHS, RHS);
3894     return true;
3895   }
3896 
3897   if (PPCSubTarget->useCRBits())
3898     return false;
3899 
3900   bool Inv;
3901   unsigned Idx = getCRIdxForSetCC(CC, Inv);
3902   SDValue CCReg = SelectCC(LHS, RHS, CC, dl);
3903   SDValue IntCR;
3904 
3905   // Force the ccreg into CR7.
3906   SDValue CR7Reg = CurDAG->getRegister(PPC::CR7, MVT::i32);
3907 
3908   SDValue InFlag(nullptr, 0);  // Null incoming flag value.
3909   CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, CR7Reg, CCReg,
3910                                InFlag).getValue(1);
3911 
3912   IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg,
3913                                          CCReg), 0);
3914 
3915   SDValue Ops[] = { IntCR, getI32Imm((32 - (3 - Idx)) & 31, dl),
3916                       getI32Imm(31, dl), getI32Imm(31, dl) };
3917   if (!Inv) {
3918     CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
3919     return true;
3920   }
3921 
3922   // Get the specified bit.
3923   SDValue Tmp =
3924     SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
3925   CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1, dl));
3926   return true;
3927 }
3928 
3929 /// Does this node represent a load/store node whose address can be represented
3930 /// with a register plus an immediate that's a multiple of \p Val:
3931 bool PPCDAGToDAGISel::isOffsetMultipleOf(SDNode *N, unsigned Val) const {
3932   LoadSDNode *LDN = dyn_cast<LoadSDNode>(N);
3933   StoreSDNode *STN = dyn_cast<StoreSDNode>(N);
3934   SDValue AddrOp;
3935   if (LDN)
3936     AddrOp = LDN->getOperand(1);
3937   else if (STN)
3938     AddrOp = STN->getOperand(2);
3939 
3940   short Imm = 0;
3941   if (AddrOp.getOpcode() == ISD::ADD) {
3942     // If op0 is a frame index that is under aligned, we can't do it either,
3943     // because it is translated to r31 or r1 + slot + offset. We won't know the
3944     // slot number until the stack frame is finalized.
3945     if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(AddrOp.getOperand(0))) {
3946       const MachineFrameInfo &MFI = CurDAG->getMachineFunction().getFrameInfo();
3947       unsigned SlotAlign = MFI.getObjectAlignment(FI->getIndex());
3948       if ((SlotAlign % Val) != 0)
3949         return false;
3950     }
3951     return isIntS16Immediate(AddrOp.getOperand(1), Imm) && !(Imm % Val);
3952   }
3953 
3954   // If the address comes from the outside, the offset will be zero.
3955   return AddrOp.getOpcode() == ISD::CopyFromReg;
3956 }
3957 
3958 void PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
3959   // Transfer memoperands.
3960   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
3961   MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
3962   cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
3963 }
3964 
3965 // Select - Convert the specified operand from a target-independent to a
3966 // target-specific node if it hasn't already been changed.
3967 void PPCDAGToDAGISel::Select(SDNode *N) {
3968   SDLoc dl(N);
3969   if (N->isMachineOpcode()) {
3970     N->setNodeId(-1);
3971     return;   // Already selected.
3972   }
3973 
3974   // In case any misguided DAG-level optimizations form an ADD with a
3975   // TargetConstant operand, crash here instead of miscompiling (by selecting
3976   // an r+r add instead of some kind of r+i add).
3977   if (N->getOpcode() == ISD::ADD &&
3978       N->getOperand(1).getOpcode() == ISD::TargetConstant)
3979     llvm_unreachable("Invalid ADD with TargetConstant operand");
3980 
3981   // Try matching complex bit permutations before doing anything else.
3982   if (tryBitPermutation(N))
3983     return;
3984 
3985   // Try to emit integer compares as GPR-only sequences (i.e. no use of CR).
3986   if (tryIntCompareInGPR(N))
3987     return;
3988 
3989   switch (N->getOpcode()) {
3990   default: break;
3991 
3992   case ISD::Constant:
3993     if (N->getValueType(0) == MVT::i64) {
3994       ReplaceNode(N, selectI64Imm(CurDAG, N));
3995       return;
3996     }
3997     break;
3998 
3999   case ISD::SETCC:
4000     if (trySETCC(N))
4001       return;
4002     break;
4003 
4004   case PPCISD::CALL: {
4005     const Module *M = MF->getFunction().getParent();
4006 
4007     if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 ||
4008         !PPCSubTarget->isSecurePlt() || !PPCSubTarget->isTargetELF() ||
4009         M->getPICLevel() == PICLevel::SmallPIC)
4010       break;
4011 
4012     SDValue Op = N->getOperand(1);
4013 
4014     if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
4015       if (GA->getTargetFlags() == PPCII::MO_PLT)
4016         getGlobalBaseReg();
4017     }
4018     else if (ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op)) {
4019       if (ES->getTargetFlags() == PPCII::MO_PLT)
4020         getGlobalBaseReg();
4021     }
4022   }
4023     break;
4024 
4025   case PPCISD::GlobalBaseReg:
4026     ReplaceNode(N, getGlobalBaseReg());
4027     return;
4028 
4029   case ISD::FrameIndex:
4030     selectFrameIndex(N, N);
4031     return;
4032 
4033   case PPCISD::MFOCRF: {
4034     SDValue InFlag = N->getOperand(1);
4035     ReplaceNode(N, CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32,
4036                                           N->getOperand(0), InFlag));
4037     return;
4038   }
4039 
4040   case PPCISD::READ_TIME_BASE:
4041     ReplaceNode(N, CurDAG->getMachineNode(PPC::ReadTB, dl, MVT::i32, MVT::i32,
4042                                           MVT::Other, N->getOperand(0)));
4043     return;
4044 
4045   case PPCISD::SRA_ADDZE: {
4046     SDValue N0 = N->getOperand(0);
4047     SDValue ShiftAmt =
4048       CurDAG->getTargetConstant(*cast<ConstantSDNode>(N->getOperand(1))->
4049                                   getConstantIntValue(), dl,
4050                                   N->getValueType(0));
4051     if (N->getValueType(0) == MVT::i64) {
4052       SDNode *Op =
4053         CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, MVT::Glue,
4054                                N0, ShiftAmt);
4055       CurDAG->SelectNodeTo(N, PPC::ADDZE8, MVT::i64, SDValue(Op, 0),
4056                            SDValue(Op, 1));
4057       return;
4058     } else {
4059       assert(N->getValueType(0) == MVT::i32 &&
4060              "Expecting i64 or i32 in PPCISD::SRA_ADDZE");
4061       SDNode *Op =
4062         CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue,
4063                                N0, ShiftAmt);
4064       CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, SDValue(Op, 0),
4065                            SDValue(Op, 1));
4066       return;
4067     }
4068   }
4069 
4070   case ISD::STORE: {
4071     // Change TLS initial-exec D-form stores to X-form stores.
4072     StoreSDNode *ST = cast<StoreSDNode>(N);
4073     if (EnableTLSOpt && PPCSubTarget->isELFv2ABI() &&
4074         ST->getAddressingMode() != ISD::PRE_INC)
4075       if (tryTLSXFormStore(ST))
4076         return;
4077     break;
4078   }
4079   case ISD::LOAD: {
4080     // Handle preincrement loads.
4081     LoadSDNode *LD = cast<LoadSDNode>(N);
4082     EVT LoadedVT = LD->getMemoryVT();
4083 
4084     // Normal loads are handled by code generated from the .td file.
4085     if (LD->getAddressingMode() != ISD::PRE_INC) {
4086       // Change TLS initial-exec D-form loads to X-form loads.
4087       if (EnableTLSOpt && PPCSubTarget->isELFv2ABI())
4088         if (tryTLSXFormLoad(LD))
4089           return;
4090       break;
4091     }
4092 
4093     SDValue Offset = LD->getOffset();
4094     if (Offset.getOpcode() == ISD::TargetConstant ||
4095         Offset.getOpcode() == ISD::TargetGlobalAddress) {
4096 
4097       unsigned Opcode;
4098       bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
4099       if (LD->getValueType(0) != MVT::i64) {
4100         // Handle PPC32 integer and normal FP loads.
4101         assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
4102         switch (LoadedVT.getSimpleVT().SimpleTy) {
4103           default: llvm_unreachable("Invalid PPC load type!");
4104           case MVT::f64: Opcode = PPC::LFDU; break;
4105           case MVT::f32: Opcode = PPC::LFSU; break;
4106           case MVT::i32: Opcode = PPC::LWZU; break;
4107           case MVT::i16: Opcode = isSExt ? PPC::LHAU : PPC::LHZU; break;
4108           case MVT::i1:
4109           case MVT::i8:  Opcode = PPC::LBZU; break;
4110         }
4111       } else {
4112         assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!");
4113         assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
4114         switch (LoadedVT.getSimpleVT().SimpleTy) {
4115           default: llvm_unreachable("Invalid PPC load type!");
4116           case MVT::i64: Opcode = PPC::LDU; break;
4117           case MVT::i32: Opcode = PPC::LWZU8; break;
4118           case MVT::i16: Opcode = isSExt ? PPC::LHAU8 : PPC::LHZU8; break;
4119           case MVT::i1:
4120           case MVT::i8:  Opcode = PPC::LBZU8; break;
4121         }
4122       }
4123 
4124       SDValue Chain = LD->getChain();
4125       SDValue Base = LD->getBasePtr();
4126       SDValue Ops[] = { Offset, Base, Chain };
4127       SDNode *MN = CurDAG->getMachineNode(
4128           Opcode, dl, LD->getValueType(0),
4129           PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops);
4130       transferMemOperands(N, MN);
4131       ReplaceNode(N, MN);
4132       return;
4133     } else {
4134       unsigned Opcode;
4135       bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
4136       if (LD->getValueType(0) != MVT::i64) {
4137         // Handle PPC32 integer and normal FP loads.
4138         assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
4139         switch (LoadedVT.getSimpleVT().SimpleTy) {
4140           default: llvm_unreachable("Invalid PPC load type!");
4141           case MVT::v4f64: Opcode = PPC::QVLFDUX; break; // QPX
4142           case MVT::v4f32: Opcode = PPC::QVLFSUX; break; // QPX
4143           case MVT::f64: Opcode = PPC::LFDUX; break;
4144           case MVT::f32: Opcode = PPC::LFSUX; break;
4145           case MVT::i32: Opcode = PPC::LWZUX; break;
4146           case MVT::i16: Opcode = isSExt ? PPC::LHAUX : PPC::LHZUX; break;
4147           case MVT::i1:
4148           case MVT::i8:  Opcode = PPC::LBZUX; break;
4149         }
4150       } else {
4151         assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!");
4152         assert((!isSExt || LoadedVT == MVT::i16 || LoadedVT == MVT::i32) &&
4153                "Invalid sext update load");
4154         switch (LoadedVT.getSimpleVT().SimpleTy) {
4155           default: llvm_unreachable("Invalid PPC load type!");
4156           case MVT::i64: Opcode = PPC::LDUX; break;
4157           case MVT::i32: Opcode = isSExt ? PPC::LWAUX  : PPC::LWZUX8; break;
4158           case MVT::i16: Opcode = isSExt ? PPC::LHAUX8 : PPC::LHZUX8; break;
4159           case MVT::i1:
4160           case MVT::i8:  Opcode = PPC::LBZUX8; break;
4161         }
4162       }
4163 
4164       SDValue Chain = LD->getChain();
4165       SDValue Base = LD->getBasePtr();
4166       SDValue Ops[] = { Base, Offset, Chain };
4167       SDNode *MN = CurDAG->getMachineNode(
4168           Opcode, dl, LD->getValueType(0),
4169           PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops);
4170       transferMemOperands(N, MN);
4171       ReplaceNode(N, MN);
4172       return;
4173     }
4174   }
4175 
4176   case ISD::AND: {
4177     unsigned Imm, Imm2, SH, MB, ME;
4178     uint64_t Imm64;
4179 
4180     // If this is an and of a value rotated between 0 and 31 bits and then and'd
4181     // with a mask, emit rlwinm
4182     if (isInt32Immediate(N->getOperand(1), Imm) &&
4183         isRotateAndMask(N->getOperand(0).getNode(), Imm, false, SH, MB, ME)) {
4184       SDValue Val = N->getOperand(0).getOperand(0);
4185       SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl),
4186                         getI32Imm(ME, dl) };
4187       CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4188       return;
4189     }
4190     // If this is just a masked value where the input is not handled above, and
4191     // is not a rotate-left (handled by a pattern in the .td file), emit rlwinm
4192     if (isInt32Immediate(N->getOperand(1), Imm) &&
4193         isRunOfOnes(Imm, MB, ME) &&
4194         N->getOperand(0).getOpcode() != ISD::ROTL) {
4195       SDValue Val = N->getOperand(0);
4196       SDValue Ops[] = { Val, getI32Imm(0, dl), getI32Imm(MB, dl),
4197                         getI32Imm(ME, dl) };
4198       CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4199       return;
4200     }
4201     // If this is a 64-bit zero-extension mask, emit rldicl.
4202     if (isInt64Immediate(N->getOperand(1).getNode(), Imm64) &&
4203         isMask_64(Imm64)) {
4204       SDValue Val = N->getOperand(0);
4205       MB = 64 - countTrailingOnes(Imm64);
4206       SH = 0;
4207 
4208       if (Val.getOpcode() == ISD::ANY_EXTEND) {
4209         auto Op0 = Val.getOperand(0);
4210         if ( Op0.getOpcode() == ISD::SRL &&
4211            isInt32Immediate(Op0.getOperand(1).getNode(), Imm) && Imm <= MB) {
4212 
4213            auto ResultType = Val.getNode()->getValueType(0);
4214            auto ImDef = CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl,
4215                                                ResultType);
4216            SDValue IDVal (ImDef, 0);
4217 
4218            Val = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl,
4219                          ResultType, IDVal, Op0.getOperand(0),
4220                          getI32Imm(1, dl)), 0);
4221            SH = 64 - Imm;
4222         }
4223       }
4224 
4225       // If the operand is a logical right shift, we can fold it into this
4226       // instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb)
4227       // for n <= mb. The right shift is really a left rotate followed by a
4228       // mask, and this mask is a more-restrictive sub-mask of the mask implied
4229       // by the shift.
4230       if (Val.getOpcode() == ISD::SRL &&
4231           isInt32Immediate(Val.getOperand(1).getNode(), Imm) && Imm <= MB) {
4232         assert(Imm < 64 && "Illegal shift amount");
4233         Val = Val.getOperand(0);
4234         SH = 64 - Imm;
4235       }
4236 
4237       SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl) };
4238       CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);
4239       return;
4240     }
4241     // If this is a negated 64-bit zero-extension mask,
4242     // i.e. the immediate is a sequence of ones from most significant side
4243     // and all zero for reminder, we should use rldicr.
4244     if (isInt64Immediate(N->getOperand(1).getNode(), Imm64) &&
4245         isMask_64(~Imm64)) {
4246       SDValue Val = N->getOperand(0);
4247       MB = 63 - countTrailingOnes(~Imm64);
4248       SH = 0;
4249       SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl) };
4250       CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops);
4251       return;
4252     }
4253 
4254     // AND X, 0 -> 0, not "rlwinm 32".
4255     if (isInt32Immediate(N->getOperand(1), Imm) && (Imm == 0)) {
4256       ReplaceUses(SDValue(N, 0), N->getOperand(1));
4257       return;
4258     }
4259     // ISD::OR doesn't get all the bitfield insertion fun.
4260     // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) might be a
4261     // bitfield insert.
4262     if (isInt32Immediate(N->getOperand(1), Imm) &&
4263         N->getOperand(0).getOpcode() == ISD::OR &&
4264         isInt32Immediate(N->getOperand(0).getOperand(1), Imm2)) {
4265       // The idea here is to check whether this is equivalent to:
4266       //   (c1 & m) | (x & ~m)
4267       // where m is a run-of-ones mask. The logic here is that, for each bit in
4268       // c1 and c2:
4269       //  - if both are 1, then the output will be 1.
4270       //  - if both are 0, then the output will be 0.
4271       //  - if the bit in c1 is 0, and the bit in c2 is 1, then the output will
4272       //    come from x.
4273       //  - if the bit in c1 is 1, and the bit in c2 is 0, then the output will
4274       //    be 0.
4275       //  If that last condition is never the case, then we can form m from the
4276       //  bits that are the same between c1 and c2.
4277       unsigned MB, ME;
4278       if (isRunOfOnes(~(Imm^Imm2), MB, ME) && !(~Imm & Imm2)) {
4279         SDValue Ops[] = { N->getOperand(0).getOperand(0),
4280                             N->getOperand(0).getOperand(1),
4281                             getI32Imm(0, dl), getI32Imm(MB, dl),
4282                             getI32Imm(ME, dl) };
4283         ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops));
4284         return;
4285       }
4286     }
4287 
4288     // Other cases are autogenerated.
4289     break;
4290   }
4291   case ISD::OR: {
4292     if (N->getValueType(0) == MVT::i32)
4293       if (tryBitfieldInsert(N))
4294         return;
4295 
4296     int16_t Imm;
4297     if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&
4298         isIntS16Immediate(N->getOperand(1), Imm)) {
4299       KnownBits LHSKnown;
4300       CurDAG->computeKnownBits(N->getOperand(0), LHSKnown);
4301 
4302       // If this is equivalent to an add, then we can fold it with the
4303       // FrameIndex calculation.
4304       if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)Imm) == ~0ULL) {
4305         selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm);
4306         return;
4307       }
4308     }
4309 
4310     // OR with a 32-bit immediate can be handled by ori + oris
4311     // without creating an immediate in a GPR.
4312     uint64_t Imm64 = 0;
4313     bool IsPPC64 = PPCSubTarget->isPPC64();
4314     if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) &&
4315         (Imm64 & ~0xFFFFFFFFuLL) == 0) {
4316       // If ImmHi (ImmHi) is zero, only one ori (oris) is generated later.
4317       uint64_t ImmHi = Imm64 >> 16;
4318       uint64_t ImmLo = Imm64 & 0xFFFF;
4319       if (ImmHi != 0 && ImmLo != 0) {
4320         SDNode *Lo = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
4321                                             N->getOperand(0),
4322                                             getI16Imm(ImmLo, dl));
4323         SDValue Ops1[] = { SDValue(Lo, 0), getI16Imm(ImmHi, dl)};
4324         CurDAG->SelectNodeTo(N, PPC::ORIS8, MVT::i64, Ops1);
4325         return;
4326       }
4327     }
4328 
4329     // Other cases are autogenerated.
4330     break;
4331   }
4332   case ISD::XOR: {
4333     // XOR with a 32-bit immediate can be handled by xori + xoris
4334     // without creating an immediate in a GPR.
4335     uint64_t Imm64 = 0;
4336     bool IsPPC64 = PPCSubTarget->isPPC64();
4337     if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) &&
4338         (Imm64 & ~0xFFFFFFFFuLL) == 0) {
4339       // If ImmHi (ImmHi) is zero, only one xori (xoris) is generated later.
4340       uint64_t ImmHi = Imm64 >> 16;
4341       uint64_t ImmLo = Imm64 & 0xFFFF;
4342       if (ImmHi != 0 && ImmLo != 0) {
4343         SDNode *Lo = CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,
4344                                             N->getOperand(0),
4345                                             getI16Imm(ImmLo, dl));
4346         SDValue Ops1[] = { SDValue(Lo, 0), getI16Imm(ImmHi, dl)};
4347         CurDAG->SelectNodeTo(N, PPC::XORIS8, MVT::i64, Ops1);
4348         return;
4349       }
4350     }
4351 
4352     break;
4353   }
4354   case ISD::ADD: {
4355     int16_t Imm;
4356     if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&
4357         isIntS16Immediate(N->getOperand(1), Imm)) {
4358       selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm);
4359       return;
4360     }
4361 
4362     break;
4363   }
4364   case ISD::SHL: {
4365     unsigned Imm, SH, MB, ME;
4366     if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
4367         isRotateAndMask(N, Imm, true, SH, MB, ME)) {
4368       SDValue Ops[] = { N->getOperand(0).getOperand(0),
4369                           getI32Imm(SH, dl), getI32Imm(MB, dl),
4370                           getI32Imm(ME, dl) };
4371       CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4372       return;
4373     }
4374 
4375     // Other cases are autogenerated.
4376     break;
4377   }
4378   case ISD::SRL: {
4379     unsigned Imm, SH, MB, ME;
4380     if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
4381         isRotateAndMask(N, Imm, true, SH, MB, ME)) {
4382       SDValue Ops[] = { N->getOperand(0).getOperand(0),
4383                           getI32Imm(SH, dl), getI32Imm(MB, dl),
4384                           getI32Imm(ME, dl) };
4385       CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4386       return;
4387     }
4388 
4389     // Other cases are autogenerated.
4390     break;
4391   }
4392   // FIXME: Remove this once the ANDI glue bug is fixed:
4393   case PPCISD::ANDIo_1_EQ_BIT:
4394   case PPCISD::ANDIo_1_GT_BIT: {
4395     if (!ANDIGlueBug)
4396       break;
4397 
4398     EVT InVT = N->getOperand(0).getValueType();
4399     assert((InVT == MVT::i64 || InVT == MVT::i32) &&
4400            "Invalid input type for ANDIo_1_EQ_BIT");
4401 
4402     unsigned Opcode = (InVT == MVT::i64) ? PPC::ANDIo8 : PPC::ANDIo;
4403     SDValue AndI(CurDAG->getMachineNode(Opcode, dl, InVT, MVT::Glue,
4404                                         N->getOperand(0),
4405                                         CurDAG->getTargetConstant(1, dl, InVT)),
4406                  0);
4407     SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32);
4408     SDValue SRIdxVal =
4409       CurDAG->getTargetConstant(N->getOpcode() == PPCISD::ANDIo_1_EQ_BIT ?
4410                                 PPC::sub_eq : PPC::sub_gt, dl, MVT::i32);
4411 
4412     CurDAG->SelectNodeTo(N, TargetOpcode::EXTRACT_SUBREG, MVT::i1, CR0Reg,
4413                          SRIdxVal, SDValue(AndI.getNode(), 1) /* glue */);
4414     return;
4415   }
4416   case ISD::SELECT_CC: {
4417     ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
4418     EVT PtrVT =
4419         CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout());
4420     bool isPPC64 = (PtrVT == MVT::i64);
4421 
4422     // If this is a select of i1 operands, we'll pattern match it.
4423     if (PPCSubTarget->useCRBits() &&
4424         N->getOperand(0).getValueType() == MVT::i1)
4425       break;
4426 
4427     // Handle the setcc cases here.  select_cc lhs, 0, 1, 0, cc
4428     if (!isPPC64)
4429       if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)))
4430         if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N->getOperand(2)))
4431           if (ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N->getOperand(3)))
4432             if (N1C->isNullValue() && N3C->isNullValue() &&
4433                 N2C->getZExtValue() == 1ULL && CC == ISD::SETNE &&
4434                 // FIXME: Implement this optzn for PPC64.
4435                 N->getValueType(0) == MVT::i32) {
4436               SDNode *Tmp =
4437                 CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
4438                                        N->getOperand(0), getI32Imm(~0U, dl));
4439               CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(Tmp, 0),
4440                                    N->getOperand(0), SDValue(Tmp, 1));
4441               return;
4442             }
4443 
4444     SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl);
4445 
4446     if (N->getValueType(0) == MVT::i1) {
4447       // An i1 select is: (c & t) | (!c & f).
4448       bool Inv;
4449       unsigned Idx = getCRIdxForSetCC(CC, Inv);
4450 
4451       unsigned SRI;
4452       switch (Idx) {
4453       default: llvm_unreachable("Invalid CC index");
4454       case 0: SRI = PPC::sub_lt; break;
4455       case 1: SRI = PPC::sub_gt; break;
4456       case 2: SRI = PPC::sub_eq; break;
4457       case 3: SRI = PPC::sub_un; break;
4458       }
4459 
4460       SDValue CCBit = CurDAG->getTargetExtractSubreg(SRI, dl, MVT::i1, CCReg);
4461 
4462       SDValue NotCCBit(CurDAG->getMachineNode(PPC::CRNOR, dl, MVT::i1,
4463                                               CCBit, CCBit), 0);
4464       SDValue C =    Inv ? NotCCBit : CCBit,
4465               NotC = Inv ? CCBit    : NotCCBit;
4466 
4467       SDValue CAndT(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1,
4468                                            C, N->getOperand(2)), 0);
4469       SDValue NotCAndF(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1,
4470                                               NotC, N->getOperand(3)), 0);
4471 
4472       CurDAG->SelectNodeTo(N, PPC::CROR, MVT::i1, CAndT, NotCAndF);
4473       return;
4474     }
4475 
4476     unsigned BROpc = getPredicateForSetCC(CC);
4477 
4478     unsigned SelectCCOp;
4479     if (N->getValueType(0) == MVT::i32)
4480       SelectCCOp = PPC::SELECT_CC_I4;
4481     else if (N->getValueType(0) == MVT::i64)
4482       SelectCCOp = PPC::SELECT_CC_I8;
4483     else if (N->getValueType(0) == MVT::f32)
4484       if (PPCSubTarget->hasP8Vector())
4485         SelectCCOp = PPC::SELECT_CC_VSSRC;
4486       else
4487         SelectCCOp = PPC::SELECT_CC_F4;
4488     else if (N->getValueType(0) == MVT::f64)
4489       if (PPCSubTarget->hasVSX())
4490         SelectCCOp = PPC::SELECT_CC_VSFRC;
4491       else
4492         SelectCCOp = PPC::SELECT_CC_F8;
4493     else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4f64)
4494       SelectCCOp = PPC::SELECT_CC_QFRC;
4495     else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4f32)
4496       SelectCCOp = PPC::SELECT_CC_QSRC;
4497     else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4i1)
4498       SelectCCOp = PPC::SELECT_CC_QBRC;
4499     else if (N->getValueType(0) == MVT::v2f64 ||
4500              N->getValueType(0) == MVT::v2i64)
4501       SelectCCOp = PPC::SELECT_CC_VSRC;
4502     else
4503       SelectCCOp = PPC::SELECT_CC_VRRC;
4504 
4505     SDValue Ops[] = { CCReg, N->getOperand(2), N->getOperand(3),
4506                         getI32Imm(BROpc, dl) };
4507     CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops);
4508     return;
4509   }
4510   case ISD::VSELECT:
4511     if (PPCSubTarget->hasVSX()) {
4512       SDValue Ops[] = { N->getOperand(2), N->getOperand(1), N->getOperand(0) };
4513       CurDAG->SelectNodeTo(N, PPC::XXSEL, N->getValueType(0), Ops);
4514       return;
4515     }
4516     break;
4517 
4518   case ISD::VECTOR_SHUFFLE:
4519     if (PPCSubTarget->hasVSX() && (N->getValueType(0) == MVT::v2f64 ||
4520                                   N->getValueType(0) == MVT::v2i64)) {
4521       ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
4522 
4523       SDValue Op1 = N->getOperand(SVN->getMaskElt(0) < 2 ? 0 : 1),
4524               Op2 = N->getOperand(SVN->getMaskElt(1) < 2 ? 0 : 1);
4525       unsigned DM[2];
4526 
4527       for (int i = 0; i < 2; ++i)
4528         if (SVN->getMaskElt(i) <= 0 || SVN->getMaskElt(i) == 2)
4529           DM[i] = 0;
4530         else
4531           DM[i] = 1;
4532 
4533       if (Op1 == Op2 && DM[0] == 0 && DM[1] == 0 &&
4534           Op1.getOpcode() == ISD::SCALAR_TO_VECTOR &&
4535           isa<LoadSDNode>(Op1.getOperand(0))) {
4536         LoadSDNode *LD = cast<LoadSDNode>(Op1.getOperand(0));
4537         SDValue Base, Offset;
4538 
4539         if (LD->isUnindexed() && LD->hasOneUse() && Op1.hasOneUse() &&
4540             (LD->getMemoryVT() == MVT::f64 ||
4541              LD->getMemoryVT() == MVT::i64) &&
4542             SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) {
4543           SDValue Chain = LD->getChain();
4544           SDValue Ops[] = { Base, Offset, Chain };
4545           MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
4546           MemOp[0] = LD->getMemOperand();
4547           SDNode *NewN = CurDAG->SelectNodeTo(N, PPC::LXVDSX,
4548                                               N->getValueType(0), Ops);
4549           cast<MachineSDNode>(NewN)->setMemRefs(MemOp, MemOp + 1);
4550           return;
4551         }
4552       }
4553 
4554       // For little endian, we must swap the input operands and adjust
4555       // the mask elements (reverse and invert them).
4556       if (PPCSubTarget->isLittleEndian()) {
4557         std::swap(Op1, Op2);
4558         unsigned tmp = DM[0];
4559         DM[0] = 1 - DM[1];
4560         DM[1] = 1 - tmp;
4561       }
4562 
4563       SDValue DMV = CurDAG->getTargetConstant(DM[1] | (DM[0] << 1), dl,
4564                                               MVT::i32);
4565       SDValue Ops[] = { Op1, Op2, DMV };
4566       CurDAG->SelectNodeTo(N, PPC::XXPERMDI, N->getValueType(0), Ops);
4567       return;
4568     }
4569 
4570     break;
4571   case PPCISD::BDNZ:
4572   case PPCISD::BDZ: {
4573     bool IsPPC64 = PPCSubTarget->isPPC64();
4574     SDValue Ops[] = { N->getOperand(1), N->getOperand(0) };
4575     CurDAG->SelectNodeTo(N, N->getOpcode() == PPCISD::BDNZ
4576                                 ? (IsPPC64 ? PPC::BDNZ8 : PPC::BDNZ)
4577                                 : (IsPPC64 ? PPC::BDZ8 : PPC::BDZ),
4578                          MVT::Other, Ops);
4579     return;
4580   }
4581   case PPCISD::COND_BRANCH: {
4582     // Op #0 is the Chain.
4583     // Op #1 is the PPC::PRED_* number.
4584     // Op #2 is the CR#
4585     // Op #3 is the Dest MBB
4586     // Op #4 is the Flag.
4587     // Prevent PPC::PRED_* from being selected into LI.
4588     unsigned PCC = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
4589     if (EnableBranchHint)
4590       PCC |= getBranchHint(PCC, FuncInfo, N->getOperand(3));
4591 
4592     SDValue Pred = getI32Imm(PCC, dl);
4593     SDValue Ops[] = { Pred, N->getOperand(2), N->getOperand(3),
4594       N->getOperand(0), N->getOperand(4) };
4595     CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
4596     return;
4597   }
4598   case ISD::BR_CC: {
4599     ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
4600     unsigned PCC = getPredicateForSetCC(CC);
4601 
4602     if (N->getOperand(2).getValueType() == MVT::i1) {
4603       unsigned Opc;
4604       bool Swap;
4605       switch (PCC) {
4606       default: llvm_unreachable("Unexpected Boolean-operand predicate");
4607       case PPC::PRED_LT: Opc = PPC::CRANDC; Swap = true;  break;
4608       case PPC::PRED_LE: Opc = PPC::CRORC;  Swap = true;  break;
4609       case PPC::PRED_EQ: Opc = PPC::CREQV;  Swap = false; break;
4610       case PPC::PRED_GE: Opc = PPC::CRORC;  Swap = false; break;
4611       case PPC::PRED_GT: Opc = PPC::CRANDC; Swap = false; break;
4612       case PPC::PRED_NE: Opc = PPC::CRXOR;  Swap = false; break;
4613       }
4614 
4615       SDValue BitComp(CurDAG->getMachineNode(Opc, dl, MVT::i1,
4616                                              N->getOperand(Swap ? 3 : 2),
4617                                              N->getOperand(Swap ? 2 : 3)), 0);
4618       CurDAG->SelectNodeTo(N, PPC::BC, MVT::Other, BitComp, N->getOperand(4),
4619                            N->getOperand(0));
4620       return;
4621     }
4622 
4623     if (EnableBranchHint)
4624       PCC |= getBranchHint(PCC, FuncInfo, N->getOperand(4));
4625 
4626     SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl);
4627     SDValue Ops[] = { getI32Imm(PCC, dl), CondCode,
4628                         N->getOperand(4), N->getOperand(0) };
4629     CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
4630     return;
4631   }
4632   case ISD::BRIND: {
4633     // FIXME: Should custom lower this.
4634     SDValue Chain = N->getOperand(0);
4635     SDValue Target = N->getOperand(1);
4636     unsigned Opc = Target.getValueType() == MVT::i32 ? PPC::MTCTR : PPC::MTCTR8;
4637     unsigned Reg = Target.getValueType() == MVT::i32 ? PPC::BCTR : PPC::BCTR8;
4638     Chain = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Target,
4639                                            Chain), 0);
4640     CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain);
4641     return;
4642   }
4643   case PPCISD::TOC_ENTRY: {
4644     assert ((PPCSubTarget->isPPC64() || PPCSubTarget->isSVR4ABI()) &&
4645             "Only supported for 64-bit ABI and 32-bit SVR4");
4646     if (PPCSubTarget->isSVR4ABI() && !PPCSubTarget->isPPC64()) {
4647       SDValue GA = N->getOperand(0);
4648       SDNode *MN = CurDAG->getMachineNode(PPC::LWZtoc, dl, MVT::i32, GA,
4649                                           N->getOperand(1));
4650       transferMemOperands(N, MN);
4651       ReplaceNode(N, MN);
4652       return;
4653     }
4654 
4655     // For medium and large code model, we generate two instructions as
4656     // described below.  Otherwise we allow SelectCodeCommon to handle this,
4657     // selecting one of LDtoc, LDtocJTI, LDtocCPT, and LDtocBA.
4658     CodeModel::Model CModel = TM.getCodeModel();
4659     if (CModel != CodeModel::Medium && CModel != CodeModel::Large)
4660       break;
4661 
4662     // The first source operand is a TargetGlobalAddress or a TargetJumpTable.
4663     // If it must be toc-referenced according to PPCSubTarget, we generate:
4664     //   LDtocL(@sym, ADDIStocHA(%x2, @sym))
4665     // Otherwise we generate:
4666     //   ADDItocL(ADDIStocHA(%x2, @sym), @sym)
4667     SDValue GA = N->getOperand(0);
4668     SDValue TOCbase = N->getOperand(1);
4669     SDNode *Tmp = CurDAG->getMachineNode(PPC::ADDIStocHA, dl, MVT::i64,
4670                                          TOCbase, GA);
4671 
4672     if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA) ||
4673         CModel == CodeModel::Large) {
4674       SDNode *MN = CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA,
4675                                           SDValue(Tmp, 0));
4676       transferMemOperands(N, MN);
4677       ReplaceNode(N, MN);
4678       return;
4679     }
4680 
4681     if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) {
4682       const GlobalValue *GV = G->getGlobal();
4683       unsigned char GVFlags = PPCSubTarget->classifyGlobalReference(GV);
4684       if (GVFlags & PPCII::MO_NLP_FLAG) {
4685         SDNode *MN = CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA,
4686                                             SDValue(Tmp, 0));
4687         transferMemOperands(N, MN);
4688         ReplaceNode(N, MN);
4689         return;
4690       }
4691     }
4692 
4693     ReplaceNode(N, CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64,
4694                                           SDValue(Tmp, 0), GA));
4695     return;
4696   }
4697   case PPCISD::PPC32_PICGOT:
4698     // Generate a PIC-safe GOT reference.
4699     assert(!PPCSubTarget->isPPC64() && PPCSubTarget->isSVR4ABI() &&
4700       "PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4");
4701     CurDAG->SelectNodeTo(N, PPC::PPC32PICGOT,
4702                          PPCLowering->getPointerTy(CurDAG->getDataLayout()),
4703                          MVT::i32);
4704     return;
4705 
4706   case PPCISD::VADD_SPLAT: {
4707     // This expands into one of three sequences, depending on whether
4708     // the first operand is odd or even, positive or negative.
4709     assert(isa<ConstantSDNode>(N->getOperand(0)) &&
4710            isa<ConstantSDNode>(N->getOperand(1)) &&
4711            "Invalid operand on VADD_SPLAT!");
4712 
4713     int Elt     = N->getConstantOperandVal(0);
4714     int EltSize = N->getConstantOperandVal(1);
4715     unsigned Opc1, Opc2, Opc3;
4716     EVT VT;
4717 
4718     if (EltSize == 1) {
4719       Opc1 = PPC::VSPLTISB;
4720       Opc2 = PPC::VADDUBM;
4721       Opc3 = PPC::VSUBUBM;
4722       VT = MVT::v16i8;
4723     } else if (EltSize == 2) {
4724       Opc1 = PPC::VSPLTISH;
4725       Opc2 = PPC::VADDUHM;
4726       Opc3 = PPC::VSUBUHM;
4727       VT = MVT::v8i16;
4728     } else {
4729       assert(EltSize == 4 && "Invalid element size on VADD_SPLAT!");
4730       Opc1 = PPC::VSPLTISW;
4731       Opc2 = PPC::VADDUWM;
4732       Opc3 = PPC::VSUBUWM;
4733       VT = MVT::v4i32;
4734     }
4735 
4736     if ((Elt & 1) == 0) {
4737       // Elt is even, in the range [-32,-18] + [16,30].
4738       //
4739       // Convert: VADD_SPLAT elt, size
4740       // Into:    tmp = VSPLTIS[BHW] elt
4741       //          VADDU[BHW]M tmp, tmp
4742       // Where:   [BHW] = B for size = 1, H for size = 2, W for size = 4
4743       SDValue EltVal = getI32Imm(Elt >> 1, dl);
4744       SDNode *Tmp = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
4745       SDValue TmpVal = SDValue(Tmp, 0);
4746       ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, TmpVal, TmpVal));
4747       return;
4748     } else if (Elt > 0) {
4749       // Elt is odd and positive, in the range [17,31].
4750       //
4751       // Convert: VADD_SPLAT elt, size
4752       // Into:    tmp1 = VSPLTIS[BHW] elt-16
4753       //          tmp2 = VSPLTIS[BHW] -16
4754       //          VSUBU[BHW]M tmp1, tmp2
4755       SDValue EltVal = getI32Imm(Elt - 16, dl);
4756       SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
4757       EltVal = getI32Imm(-16, dl);
4758       SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
4759       ReplaceNode(N, CurDAG->getMachineNode(Opc3, dl, VT, SDValue(Tmp1, 0),
4760                                             SDValue(Tmp2, 0)));
4761       return;
4762     } else {
4763       // Elt is odd and negative, in the range [-31,-17].
4764       //
4765       // Convert: VADD_SPLAT elt, size
4766       // Into:    tmp1 = VSPLTIS[BHW] elt+16
4767       //          tmp2 = VSPLTIS[BHW] -16
4768       //          VADDU[BHW]M tmp1, tmp2
4769       SDValue EltVal = getI32Imm(Elt + 16, dl);
4770       SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
4771       EltVal = getI32Imm(-16, dl);
4772       SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
4773       ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, SDValue(Tmp1, 0),
4774                                             SDValue(Tmp2, 0)));
4775       return;
4776     }
4777   }
4778   }
4779 
4780   SelectCode(N);
4781 }
4782 
4783 // If the target supports the cmpb instruction, do the idiom recognition here.
4784 // We don't do this as a DAG combine because we don't want to do it as nodes
4785 // are being combined (because we might miss part of the eventual idiom). We
4786 // don't want to do it during instruction selection because we want to reuse
4787 // the logic for lowering the masking operations already part of the
4788 // instruction selector.
4789 SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) {
4790   SDLoc dl(N);
4791 
4792   assert(N->getOpcode() == ISD::OR &&
4793          "Only OR nodes are supported for CMPB");
4794 
4795   SDValue Res;
4796   if (!PPCSubTarget->hasCMPB())
4797     return Res;
4798 
4799   if (N->getValueType(0) != MVT::i32 &&
4800       N->getValueType(0) != MVT::i64)
4801     return Res;
4802 
4803   EVT VT = N->getValueType(0);
4804 
4805   SDValue RHS, LHS;
4806   bool BytesFound[8] = {false, false, false, false, false, false, false, false};
4807   uint64_t Mask = 0, Alt = 0;
4808 
4809   auto IsByteSelectCC = [this](SDValue O, unsigned &b,
4810                                uint64_t &Mask, uint64_t &Alt,
4811                                SDValue &LHS, SDValue &RHS) {
4812     if (O.getOpcode() != ISD::SELECT_CC)
4813       return false;
4814     ISD::CondCode CC = cast<CondCodeSDNode>(O.getOperand(4))->get();
4815 
4816     if (!isa<ConstantSDNode>(O.getOperand(2)) ||
4817         !isa<ConstantSDNode>(O.getOperand(3)))
4818       return false;
4819 
4820     uint64_t PM = O.getConstantOperandVal(2);
4821     uint64_t PAlt = O.getConstantOperandVal(3);
4822     for (b = 0; b < 8; ++b) {
4823       uint64_t Mask = UINT64_C(0xFF) << (8*b);
4824       if (PM && (PM & Mask) == PM && (PAlt & Mask) == PAlt)
4825         break;
4826     }
4827 
4828     if (b == 8)
4829       return false;
4830     Mask |= PM;
4831     Alt  |= PAlt;
4832 
4833     if (!isa<ConstantSDNode>(O.getOperand(1)) ||
4834         O.getConstantOperandVal(1) != 0) {
4835       SDValue Op0 = O.getOperand(0), Op1 = O.getOperand(1);
4836       if (Op0.getOpcode() == ISD::TRUNCATE)
4837         Op0 = Op0.getOperand(0);
4838       if (Op1.getOpcode() == ISD::TRUNCATE)
4839         Op1 = Op1.getOperand(0);
4840 
4841       if (Op0.getOpcode() == ISD::SRL && Op1.getOpcode() == ISD::SRL &&
4842           Op0.getOperand(1) == Op1.getOperand(1) && CC == ISD::SETEQ &&
4843           isa<ConstantSDNode>(Op0.getOperand(1))) {
4844 
4845         unsigned Bits = Op0.getValueSizeInBits();
4846         if (b != Bits/8-1)
4847           return false;
4848         if (Op0.getConstantOperandVal(1) != Bits-8)
4849           return false;
4850 
4851         LHS = Op0.getOperand(0);
4852         RHS = Op1.getOperand(0);
4853         return true;
4854       }
4855 
4856       // When we have small integers (i16 to be specific), the form present
4857       // post-legalization uses SETULT in the SELECT_CC for the
4858       // higher-order byte, depending on the fact that the
4859       // even-higher-order bytes are known to all be zero, for example:
4860       //   select_cc (xor $lhs, $rhs), 256, 65280, 0, setult
4861       // (so when the second byte is the same, because all higher-order
4862       // bits from bytes 3 and 4 are known to be zero, the result of the
4863       // xor can be at most 255)
4864       if (Op0.getOpcode() == ISD::XOR && CC == ISD::SETULT &&
4865           isa<ConstantSDNode>(O.getOperand(1))) {
4866 
4867         uint64_t ULim = O.getConstantOperandVal(1);
4868         if (ULim != (UINT64_C(1) << b*8))
4869           return false;
4870 
4871         // Now we need to make sure that the upper bytes are known to be
4872         // zero.
4873         unsigned Bits = Op0.getValueSizeInBits();
4874         if (!CurDAG->MaskedValueIsZero(
4875                 Op0, APInt::getHighBitsSet(Bits, Bits - (b + 1) * 8)))
4876           return false;
4877 
4878         LHS = Op0.getOperand(0);
4879         RHS = Op0.getOperand(1);
4880         return true;
4881       }
4882 
4883       return false;
4884     }
4885 
4886     if (CC != ISD::SETEQ)
4887       return false;
4888 
4889     SDValue Op = O.getOperand(0);
4890     if (Op.getOpcode() == ISD::AND) {
4891       if (!isa<ConstantSDNode>(Op.getOperand(1)))
4892         return false;
4893       if (Op.getConstantOperandVal(1) != (UINT64_C(0xFF) << (8*b)))
4894         return false;
4895 
4896       SDValue XOR = Op.getOperand(0);
4897       if (XOR.getOpcode() == ISD::TRUNCATE)
4898         XOR = XOR.getOperand(0);
4899       if (XOR.getOpcode() != ISD::XOR)
4900         return false;
4901 
4902       LHS = XOR.getOperand(0);
4903       RHS = XOR.getOperand(1);
4904       return true;
4905     } else if (Op.getOpcode() == ISD::SRL) {
4906       if (!isa<ConstantSDNode>(Op.getOperand(1)))
4907         return false;
4908       unsigned Bits = Op.getValueSizeInBits();
4909       if (b != Bits/8-1)
4910         return false;
4911       if (Op.getConstantOperandVal(1) != Bits-8)
4912         return false;
4913 
4914       SDValue XOR = Op.getOperand(0);
4915       if (XOR.getOpcode() == ISD::TRUNCATE)
4916         XOR = XOR.getOperand(0);
4917       if (XOR.getOpcode() != ISD::XOR)
4918         return false;
4919 
4920       LHS = XOR.getOperand(0);
4921       RHS = XOR.getOperand(1);
4922       return true;
4923     }
4924 
4925     return false;
4926   };
4927 
4928   SmallVector<SDValue, 8> Queue(1, SDValue(N, 0));
4929   while (!Queue.empty()) {
4930     SDValue V = Queue.pop_back_val();
4931 
4932     for (const SDValue &O : V.getNode()->ops()) {
4933       unsigned b;
4934       uint64_t M = 0, A = 0;
4935       SDValue OLHS, ORHS;
4936       if (O.getOpcode() == ISD::OR) {
4937         Queue.push_back(O);
4938       } else if (IsByteSelectCC(O, b, M, A, OLHS, ORHS)) {
4939         if (!LHS) {
4940           LHS = OLHS;
4941           RHS = ORHS;
4942           BytesFound[b] = true;
4943           Mask |= M;
4944           Alt  |= A;
4945         } else if ((LHS == ORHS && RHS == OLHS) ||
4946                    (RHS == ORHS && LHS == OLHS)) {
4947           BytesFound[b] = true;
4948           Mask |= M;
4949           Alt  |= A;
4950         } else {
4951           return Res;
4952         }
4953       } else {
4954         return Res;
4955       }
4956     }
4957   }
4958 
4959   unsigned LastB = 0, BCnt = 0;
4960   for (unsigned i = 0; i < 8; ++i)
4961     if (BytesFound[LastB]) {
4962       ++BCnt;
4963       LastB = i;
4964     }
4965 
4966   if (!LastB || BCnt < 2)
4967     return Res;
4968 
4969   // Because we'll be zero-extending the output anyway if don't have a specific
4970   // value for each input byte (via the Mask), we can 'anyext' the inputs.
4971   if (LHS.getValueType() != VT) {
4972     LHS = CurDAG->getAnyExtOrTrunc(LHS, dl, VT);
4973     RHS = CurDAG->getAnyExtOrTrunc(RHS, dl, VT);
4974   }
4975 
4976   Res = CurDAG->getNode(PPCISD::CMPB, dl, VT, LHS, RHS);
4977 
4978   bool NonTrivialMask = ((int64_t) Mask) != INT64_C(-1);
4979   if (NonTrivialMask && !Alt) {
4980     // Res = Mask & CMPB
4981     Res = CurDAG->getNode(ISD::AND, dl, VT, Res,
4982                           CurDAG->getConstant(Mask, dl, VT));
4983   } else if (Alt) {
4984     // Res = (CMPB & Mask) | (~CMPB & Alt)
4985     // Which, as suggested here:
4986     //   https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge
4987     // can be written as:
4988     // Res = Alt ^ ((Alt ^ Mask) & CMPB)
4989     // useful because the (Alt ^ Mask) can be pre-computed.
4990     Res = CurDAG->getNode(ISD::AND, dl, VT, Res,
4991                           CurDAG->getConstant(Mask ^ Alt, dl, VT));
4992     Res = CurDAG->getNode(ISD::XOR, dl, VT, Res,
4993                           CurDAG->getConstant(Alt, dl, VT));
4994   }
4995 
4996   return Res;
4997 }
4998 
4999 // When CR bit registers are enabled, an extension of an i1 variable to a i32
5000 // or i64 value is lowered in terms of a SELECT_I[48] operation, and thus
5001 // involves constant materialization of a 0 or a 1 or both. If the result of
5002 // the extension is then operated upon by some operator that can be constant
5003 // folded with a constant 0 or 1, and that constant can be materialized using
5004 // only one instruction (like a zero or one), then we should fold in those
5005 // operations with the select.
5006 void PPCDAGToDAGISel::foldBoolExts(SDValue &Res, SDNode *&N) {
5007   if (!PPCSubTarget->useCRBits())
5008     return;
5009 
5010   if (N->getOpcode() != ISD::ZERO_EXTEND &&
5011       N->getOpcode() != ISD::SIGN_EXTEND &&
5012       N->getOpcode() != ISD::ANY_EXTEND)
5013     return;
5014 
5015   if (N->getOperand(0).getValueType() != MVT::i1)
5016     return;
5017 
5018   if (!N->hasOneUse())
5019     return;
5020 
5021   SDLoc dl(N);
5022   EVT VT = N->getValueType(0);
5023   SDValue Cond = N->getOperand(0);
5024   SDValue ConstTrue =
5025     CurDAG->getConstant(N->getOpcode() == ISD::SIGN_EXTEND ? -1 : 1, dl, VT);
5026   SDValue ConstFalse = CurDAG->getConstant(0, dl, VT);
5027 
5028   do {
5029     SDNode *User = *N->use_begin();
5030     if (User->getNumOperands() != 2)
5031       break;
5032 
5033     auto TryFold = [this, N, User, dl](SDValue Val) {
5034       SDValue UserO0 = User->getOperand(0), UserO1 = User->getOperand(1);
5035       SDValue O0 = UserO0.getNode() == N ? Val : UserO0;
5036       SDValue O1 = UserO1.getNode() == N ? Val : UserO1;
5037 
5038       return CurDAG->FoldConstantArithmetic(User->getOpcode(), dl,
5039                                             User->getValueType(0),
5040                                             O0.getNode(), O1.getNode());
5041     };
5042 
5043     // FIXME: When the semantics of the interaction between select and undef
5044     // are clearly defined, it may turn out to be unnecessary to break here.
5045     SDValue TrueRes = TryFold(ConstTrue);
5046     if (!TrueRes || TrueRes.isUndef())
5047       break;
5048     SDValue FalseRes = TryFold(ConstFalse);
5049     if (!FalseRes || FalseRes.isUndef())
5050       break;
5051 
5052     // For us to materialize these using one instruction, we must be able to
5053     // represent them as signed 16-bit integers.
5054     uint64_t True  = cast<ConstantSDNode>(TrueRes)->getZExtValue(),
5055              False = cast<ConstantSDNode>(FalseRes)->getZExtValue();
5056     if (!isInt<16>(True) || !isInt<16>(False))
5057       break;
5058 
5059     // We can replace User with a new SELECT node, and try again to see if we
5060     // can fold the select with its user.
5061     Res = CurDAG->getSelect(dl, User->getValueType(0), Cond, TrueRes, FalseRes);
5062     N = User;
5063     ConstTrue = TrueRes;
5064     ConstFalse = FalseRes;
5065   } while (N->hasOneUse());
5066 }
5067 
5068 void PPCDAGToDAGISel::PreprocessISelDAG() {
5069   SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
5070   ++Position;
5071 
5072   bool MadeChange = false;
5073   while (Position != CurDAG->allnodes_begin()) {
5074     SDNode *N = &*--Position;
5075     if (N->use_empty())
5076       continue;
5077 
5078     SDValue Res;
5079     switch (N->getOpcode()) {
5080     default: break;
5081     case ISD::OR:
5082       Res = combineToCMPB(N);
5083       break;
5084     }
5085 
5086     if (!Res)
5087       foldBoolExts(Res, N);
5088 
5089     if (Res) {
5090       DEBUG(dbgs() << "PPC DAG preprocessing replacing:\nOld:    ");
5091       DEBUG(N->dump(CurDAG));
5092       DEBUG(dbgs() << "\nNew: ");
5093       DEBUG(Res.getNode()->dump(CurDAG));
5094       DEBUG(dbgs() << "\n");
5095 
5096       CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
5097       MadeChange = true;
5098     }
5099   }
5100 
5101   if (MadeChange)
5102     CurDAG->RemoveDeadNodes();
5103 }
5104 
5105 /// PostprocessISelDAG - Perform some late peephole optimizations
5106 /// on the DAG representation.
5107 void PPCDAGToDAGISel::PostprocessISelDAG() {
5108   // Skip peepholes at -O0.
5109   if (TM.getOptLevel() == CodeGenOpt::None)
5110     return;
5111 
5112   PeepholePPC64();
5113   PeepholeCROps();
5114   PeepholePPC64ZExt();
5115 }
5116 
5117 // Check if all users of this node will become isel where the second operand
5118 // is the constant zero. If this is so, and if we can negate the condition,
5119 // then we can flip the true and false operands. This will allow the zero to
5120 // be folded with the isel so that we don't need to materialize a register
5121 // containing zero.
5122 bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode *N) {
5123   for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
5124        UI != UE; ++UI) {
5125     SDNode *User = *UI;
5126     if (!User->isMachineOpcode())
5127       return false;
5128     if (User->getMachineOpcode() != PPC::SELECT_I4 &&
5129         User->getMachineOpcode() != PPC::SELECT_I8)
5130       return false;
5131 
5132     SDNode *Op2 = User->getOperand(2).getNode();
5133     if (!Op2->isMachineOpcode())
5134       return false;
5135 
5136     if (Op2->getMachineOpcode() != PPC::LI &&
5137         Op2->getMachineOpcode() != PPC::LI8)
5138       return false;
5139 
5140     ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op2->getOperand(0));
5141     if (!C)
5142       return false;
5143 
5144     if (!C->isNullValue())
5145       return false;
5146   }
5147 
5148   return true;
5149 }
5150 
5151 void PPCDAGToDAGISel::SwapAllSelectUsers(SDNode *N) {
5152   SmallVector<SDNode *, 4> ToReplace;
5153   for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
5154        UI != UE; ++UI) {
5155     SDNode *User = *UI;
5156     assert((User->getMachineOpcode() == PPC::SELECT_I4 ||
5157             User->getMachineOpcode() == PPC::SELECT_I8) &&
5158            "Must have all select users");
5159     ToReplace.push_back(User);
5160   }
5161 
5162   for (SmallVector<SDNode *, 4>::iterator UI = ToReplace.begin(),
5163        UE = ToReplace.end(); UI != UE; ++UI) {
5164     SDNode *User = *UI;
5165     SDNode *ResNode =
5166       CurDAG->getMachineNode(User->getMachineOpcode(), SDLoc(User),
5167                              User->getValueType(0), User->getOperand(0),
5168                              User->getOperand(2),
5169                              User->getOperand(1));
5170 
5171       DEBUG(dbgs() << "CR Peephole replacing:\nOld:    ");
5172       DEBUG(User->dump(CurDAG));
5173       DEBUG(dbgs() << "\nNew: ");
5174       DEBUG(ResNode->dump(CurDAG));
5175       DEBUG(dbgs() << "\n");
5176 
5177       ReplaceUses(User, ResNode);
5178   }
5179 }
5180 
5181 void PPCDAGToDAGISel::PeepholeCROps() {
5182   bool IsModified;
5183   do {
5184     IsModified = false;
5185     for (SDNode &Node : CurDAG->allnodes()) {
5186       MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node);
5187       if (!MachineNode || MachineNode->use_empty())
5188         continue;
5189       SDNode *ResNode = MachineNode;
5190 
5191       bool Op1Set   = false, Op1Unset = false,
5192            Op1Not   = false,
5193            Op2Set   = false, Op2Unset = false,
5194            Op2Not   = false;
5195 
5196       unsigned Opcode = MachineNode->getMachineOpcode();
5197       switch (Opcode) {
5198       default: break;
5199       case PPC::CRAND:
5200       case PPC::CRNAND:
5201       case PPC::CROR:
5202       case PPC::CRXOR:
5203       case PPC::CRNOR:
5204       case PPC::CREQV:
5205       case PPC::CRANDC:
5206       case PPC::CRORC: {
5207         SDValue Op = MachineNode->getOperand(1);
5208         if (Op.isMachineOpcode()) {
5209           if (Op.getMachineOpcode() == PPC::CRSET)
5210             Op2Set = true;
5211           else if (Op.getMachineOpcode() == PPC::CRUNSET)
5212             Op2Unset = true;
5213           else if (Op.getMachineOpcode() == PPC::CRNOR &&
5214                    Op.getOperand(0) == Op.getOperand(1))
5215             Op2Not = true;
5216         }
5217         LLVM_FALLTHROUGH;
5218       }
5219       case PPC::BC:
5220       case PPC::BCn:
5221       case PPC::SELECT_I4:
5222       case PPC::SELECT_I8:
5223       case PPC::SELECT_F4:
5224       case PPC::SELECT_F8:
5225       case PPC::SELECT_QFRC:
5226       case PPC::SELECT_QSRC:
5227       case PPC::SELECT_QBRC:
5228       case PPC::SELECT_VRRC:
5229       case PPC::SELECT_VSFRC:
5230       case PPC::SELECT_VSSRC:
5231       case PPC::SELECT_VSRC: {
5232         SDValue Op = MachineNode->getOperand(0);
5233         if (Op.isMachineOpcode()) {
5234           if (Op.getMachineOpcode() == PPC::CRSET)
5235             Op1Set = true;
5236           else if (Op.getMachineOpcode() == PPC::CRUNSET)
5237             Op1Unset = true;
5238           else if (Op.getMachineOpcode() == PPC::CRNOR &&
5239                    Op.getOperand(0) == Op.getOperand(1))
5240             Op1Not = true;
5241         }
5242         }
5243         break;
5244       }
5245 
5246       bool SelectSwap = false;
5247       switch (Opcode) {
5248       default: break;
5249       case PPC::CRAND:
5250         if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
5251           // x & x = x
5252           ResNode = MachineNode->getOperand(0).getNode();
5253         else if (Op1Set)
5254           // 1 & y = y
5255           ResNode = MachineNode->getOperand(1).getNode();
5256         else if (Op2Set)
5257           // x & 1 = x
5258           ResNode = MachineNode->getOperand(0).getNode();
5259         else if (Op1Unset || Op2Unset)
5260           // x & 0 = 0 & y = 0
5261           ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
5262                                            MVT::i1);
5263         else if (Op1Not)
5264           // ~x & y = andc(y, x)
5265           ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
5266                                            MVT::i1, MachineNode->getOperand(1),
5267                                            MachineNode->getOperand(0).
5268                                              getOperand(0));
5269         else if (Op2Not)
5270           // x & ~y = andc(x, y)
5271           ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
5272                                            MVT::i1, MachineNode->getOperand(0),
5273                                            MachineNode->getOperand(1).
5274                                              getOperand(0));
5275         else if (AllUsersSelectZero(MachineNode)) {
5276           ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode),
5277                                            MVT::i1, MachineNode->getOperand(0),
5278                                            MachineNode->getOperand(1));
5279           SelectSwap = true;
5280         }
5281         break;
5282       case PPC::CRNAND:
5283         if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
5284           // nand(x, x) -> nor(x, x)
5285           ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
5286                                            MVT::i1, MachineNode->getOperand(0),
5287                                            MachineNode->getOperand(0));
5288         else if (Op1Set)
5289           // nand(1, y) -> nor(y, y)
5290           ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
5291                                            MVT::i1, MachineNode->getOperand(1),
5292                                            MachineNode->getOperand(1));
5293         else if (Op2Set)
5294           // nand(x, 1) -> nor(x, x)
5295           ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
5296                                            MVT::i1, MachineNode->getOperand(0),
5297                                            MachineNode->getOperand(0));
5298         else if (Op1Unset || Op2Unset)
5299           // nand(x, 0) = nand(0, y) = 1
5300           ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
5301                                            MVT::i1);
5302         else if (Op1Not)
5303           // nand(~x, y) = ~(~x & y) = x | ~y = orc(x, y)
5304           ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
5305                                            MVT::i1, MachineNode->getOperand(0).
5306                                                       getOperand(0),
5307                                            MachineNode->getOperand(1));
5308         else if (Op2Not)
5309           // nand(x, ~y) = ~x | y = orc(y, x)
5310           ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
5311                                            MVT::i1, MachineNode->getOperand(1).
5312                                                       getOperand(0),
5313                                            MachineNode->getOperand(0));
5314         else if (AllUsersSelectZero(MachineNode)) {
5315           ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode),
5316                                            MVT::i1, MachineNode->getOperand(0),
5317                                            MachineNode->getOperand(1));
5318           SelectSwap = true;
5319         }
5320         break;
5321       case PPC::CROR:
5322         if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
5323           // x | x = x
5324           ResNode = MachineNode->getOperand(0).getNode();
5325         else if (Op1Set || Op2Set)
5326           // x | 1 = 1 | y = 1
5327           ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
5328                                            MVT::i1);
5329         else if (Op1Unset)
5330           // 0 | y = y
5331           ResNode = MachineNode->getOperand(1).getNode();
5332         else if (Op2Unset)
5333           // x | 0 = x
5334           ResNode = MachineNode->getOperand(0).getNode();
5335         else if (Op1Not)
5336           // ~x | y = orc(y, x)
5337           ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
5338                                            MVT::i1, MachineNode->getOperand(1),
5339                                            MachineNode->getOperand(0).
5340                                              getOperand(0));
5341         else if (Op2Not)
5342           // x | ~y = orc(x, y)
5343           ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
5344                                            MVT::i1, MachineNode->getOperand(0),
5345                                            MachineNode->getOperand(1).
5346                                              getOperand(0));
5347         else if (AllUsersSelectZero(MachineNode)) {
5348           ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
5349                                            MVT::i1, MachineNode->getOperand(0),
5350                                            MachineNode->getOperand(1));
5351           SelectSwap = true;
5352         }
5353         break;
5354       case PPC::CRXOR:
5355         if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
5356           // xor(x, x) = 0
5357           ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
5358                                            MVT::i1);
5359         else if (Op1Set)
5360           // xor(1, y) -> nor(y, y)
5361           ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
5362                                            MVT::i1, MachineNode->getOperand(1),
5363                                            MachineNode->getOperand(1));
5364         else if (Op2Set)
5365           // xor(x, 1) -> nor(x, x)
5366           ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
5367                                            MVT::i1, MachineNode->getOperand(0),
5368                                            MachineNode->getOperand(0));
5369         else if (Op1Unset)
5370           // xor(0, y) = y
5371           ResNode = MachineNode->getOperand(1).getNode();
5372         else if (Op2Unset)
5373           // xor(x, 0) = x
5374           ResNode = MachineNode->getOperand(0).getNode();
5375         else if (Op1Not)
5376           // xor(~x, y) = eqv(x, y)
5377           ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
5378                                            MVT::i1, MachineNode->getOperand(0).
5379                                                       getOperand(0),
5380                                            MachineNode->getOperand(1));
5381         else if (Op2Not)
5382           // xor(x, ~y) = eqv(x, y)
5383           ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
5384                                            MVT::i1, MachineNode->getOperand(0),
5385                                            MachineNode->getOperand(1).
5386                                              getOperand(0));
5387         else if (AllUsersSelectZero(MachineNode)) {
5388           ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
5389                                            MVT::i1, MachineNode->getOperand(0),
5390                                            MachineNode->getOperand(1));
5391           SelectSwap = true;
5392         }
5393         break;
5394       case PPC::CRNOR:
5395         if (Op1Set || Op2Set)
5396           // nor(1, y) -> 0
5397           ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
5398                                            MVT::i1);
5399         else if (Op1Unset)
5400           // nor(0, y) = ~y -> nor(y, y)
5401           ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
5402                                            MVT::i1, MachineNode->getOperand(1),
5403                                            MachineNode->getOperand(1));
5404         else if (Op2Unset)
5405           // nor(x, 0) = ~x
5406           ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
5407                                            MVT::i1, MachineNode->getOperand(0),
5408                                            MachineNode->getOperand(0));
5409         else if (Op1Not)
5410           // nor(~x, y) = andc(x, y)
5411           ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
5412                                            MVT::i1, MachineNode->getOperand(0).
5413                                                       getOperand(0),
5414                                            MachineNode->getOperand(1));
5415         else if (Op2Not)
5416           // nor(x, ~y) = andc(y, x)
5417           ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
5418                                            MVT::i1, MachineNode->getOperand(1).
5419                                                       getOperand(0),
5420                                            MachineNode->getOperand(0));
5421         else if (AllUsersSelectZero(MachineNode)) {
5422           ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode),
5423                                            MVT::i1, MachineNode->getOperand(0),
5424                                            MachineNode->getOperand(1));
5425           SelectSwap = true;
5426         }
5427         break;
5428       case PPC::CREQV:
5429         if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
5430           // eqv(x, x) = 1
5431           ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
5432                                            MVT::i1);
5433         else if (Op1Set)
5434           // eqv(1, y) = y
5435           ResNode = MachineNode->getOperand(1).getNode();
5436         else if (Op2Set)
5437           // eqv(x, 1) = x
5438           ResNode = MachineNode->getOperand(0).getNode();
5439         else if (Op1Unset)
5440           // eqv(0, y) = ~y -> nor(y, y)
5441           ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
5442                                            MVT::i1, MachineNode->getOperand(1),
5443                                            MachineNode->getOperand(1));
5444         else if (Op2Unset)
5445           // eqv(x, 0) = ~x
5446           ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
5447                                            MVT::i1, MachineNode->getOperand(0),
5448                                            MachineNode->getOperand(0));
5449         else if (Op1Not)
5450           // eqv(~x, y) = xor(x, y)
5451           ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
5452                                            MVT::i1, MachineNode->getOperand(0).
5453                                                       getOperand(0),
5454                                            MachineNode->getOperand(1));
5455         else if (Op2Not)
5456           // eqv(x, ~y) = xor(x, y)
5457           ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
5458                                            MVT::i1, MachineNode->getOperand(0),
5459                                            MachineNode->getOperand(1).
5460                                              getOperand(0));
5461         else if (AllUsersSelectZero(MachineNode)) {
5462           ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
5463                                            MVT::i1, MachineNode->getOperand(0),
5464                                            MachineNode->getOperand(1));
5465           SelectSwap = true;
5466         }
5467         break;
5468       case PPC::CRANDC:
5469         if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
5470           // andc(x, x) = 0
5471           ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
5472                                            MVT::i1);
5473         else if (Op1Set)
5474           // andc(1, y) = ~y
5475           ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
5476                                            MVT::i1, MachineNode->getOperand(1),
5477                                            MachineNode->getOperand(1));
5478         else if (Op1Unset || Op2Set)
5479           // andc(0, y) = andc(x, 1) = 0
5480           ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
5481                                            MVT::i1);
5482         else if (Op2Unset)
5483           // andc(x, 0) = x
5484           ResNode = MachineNode->getOperand(0).getNode();
5485         else if (Op1Not)
5486           // andc(~x, y) = ~(x | y) = nor(x, y)
5487           ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
5488                                            MVT::i1, MachineNode->getOperand(0).
5489                                                       getOperand(0),
5490                                            MachineNode->getOperand(1));
5491         else if (Op2Not)
5492           // andc(x, ~y) = x & y
5493           ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode),
5494                                            MVT::i1, MachineNode->getOperand(0),
5495                                            MachineNode->getOperand(1).
5496                                              getOperand(0));
5497         else if (AllUsersSelectZero(MachineNode)) {
5498           ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
5499                                            MVT::i1, MachineNode->getOperand(1),
5500                                            MachineNode->getOperand(0));
5501           SelectSwap = true;
5502         }
5503         break;
5504       case PPC::CRORC:
5505         if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
5506           // orc(x, x) = 1
5507           ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
5508                                            MVT::i1);
5509         else if (Op1Set || Op2Unset)
5510           // orc(1, y) = orc(x, 0) = 1
5511           ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
5512                                            MVT::i1);
5513         else if (Op2Set)
5514           // orc(x, 1) = x
5515           ResNode = MachineNode->getOperand(0).getNode();
5516         else if (Op1Unset)
5517           // orc(0, y) = ~y
5518           ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
5519                                            MVT::i1, MachineNode->getOperand(1),
5520                                            MachineNode->getOperand(1));
5521         else if (Op1Not)
5522           // orc(~x, y) = ~(x & y) = nand(x, y)
5523           ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode),
5524                                            MVT::i1, MachineNode->getOperand(0).
5525                                                       getOperand(0),
5526                                            MachineNode->getOperand(1));
5527         else if (Op2Not)
5528           // orc(x, ~y) = x | y
5529           ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode),
5530                                            MVT::i1, MachineNode->getOperand(0),
5531                                            MachineNode->getOperand(1).
5532                                              getOperand(0));
5533         else if (AllUsersSelectZero(MachineNode)) {
5534           ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
5535                                            MVT::i1, MachineNode->getOperand(1),
5536                                            MachineNode->getOperand(0));
5537           SelectSwap = true;
5538         }
5539         break;
5540       case PPC::SELECT_I4:
5541       case PPC::SELECT_I8:
5542       case PPC::SELECT_F4:
5543       case PPC::SELECT_F8:
5544       case PPC::SELECT_QFRC:
5545       case PPC::SELECT_QSRC:
5546       case PPC::SELECT_QBRC:
5547       case PPC::SELECT_VRRC:
5548       case PPC::SELECT_VSFRC:
5549       case PPC::SELECT_VSSRC:
5550       case PPC::SELECT_VSRC:
5551         if (Op1Set)
5552           ResNode = MachineNode->getOperand(1).getNode();
5553         else if (Op1Unset)
5554           ResNode = MachineNode->getOperand(2).getNode();
5555         else if (Op1Not)
5556           ResNode = CurDAG->getMachineNode(MachineNode->getMachineOpcode(),
5557                                            SDLoc(MachineNode),
5558                                            MachineNode->getValueType(0),
5559                                            MachineNode->getOperand(0).
5560                                              getOperand(0),
5561                                            MachineNode->getOperand(2),
5562                                            MachineNode->getOperand(1));
5563         break;
5564       case PPC::BC:
5565       case PPC::BCn:
5566         if (Op1Not)
5567           ResNode = CurDAG->getMachineNode(Opcode == PPC::BC ? PPC::BCn :
5568                                                                PPC::BC,
5569                                            SDLoc(MachineNode),
5570                                            MVT::Other,
5571                                            MachineNode->getOperand(0).
5572                                              getOperand(0),
5573                                            MachineNode->getOperand(1),
5574                                            MachineNode->getOperand(2));
5575         // FIXME: Handle Op1Set, Op1Unset here too.
5576         break;
5577       }
5578 
5579       // If we're inverting this node because it is used only by selects that
5580       // we'd like to swap, then swap the selects before the node replacement.
5581       if (SelectSwap)
5582         SwapAllSelectUsers(MachineNode);
5583 
5584       if (ResNode != MachineNode) {
5585         DEBUG(dbgs() << "CR Peephole replacing:\nOld:    ");
5586         DEBUG(MachineNode->dump(CurDAG));
5587         DEBUG(dbgs() << "\nNew: ");
5588         DEBUG(ResNode->dump(CurDAG));
5589         DEBUG(dbgs() << "\n");
5590 
5591         ReplaceUses(MachineNode, ResNode);
5592         IsModified = true;
5593       }
5594     }
5595     if (IsModified)
5596       CurDAG->RemoveDeadNodes();
5597   } while (IsModified);
5598 }
5599 
5600 // Gather the set of 32-bit operations that are known to have their
5601 // higher-order 32 bits zero, where ToPromote contains all such operations.
5602 static bool PeepholePPC64ZExtGather(SDValue Op32,
5603                                     SmallPtrSetImpl<SDNode *> &ToPromote) {
5604   if (!Op32.isMachineOpcode())
5605     return false;
5606 
5607   // First, check for the "frontier" instructions (those that will clear the
5608   // higher-order 32 bits.
5609 
5610   // For RLWINM and RLWNM, we need to make sure that the mask does not wrap
5611   // around. If it does not, then these instructions will clear the
5612   // higher-order bits.
5613   if ((Op32.getMachineOpcode() == PPC::RLWINM ||
5614        Op32.getMachineOpcode() == PPC::RLWNM) &&
5615       Op32.getConstantOperandVal(2) <= Op32.getConstantOperandVal(3)) {
5616     ToPromote.insert(Op32.getNode());
5617     return true;
5618   }
5619 
5620   // SLW and SRW always clear the higher-order bits.
5621   if (Op32.getMachineOpcode() == PPC::SLW ||
5622       Op32.getMachineOpcode() == PPC::SRW) {
5623     ToPromote.insert(Op32.getNode());
5624     return true;
5625   }
5626 
5627   // For LI and LIS, we need the immediate to be positive (so that it is not
5628   // sign extended).
5629   if (Op32.getMachineOpcode() == PPC::LI ||
5630       Op32.getMachineOpcode() == PPC::LIS) {
5631     if (!isUInt<15>(Op32.getConstantOperandVal(0)))
5632       return false;
5633 
5634     ToPromote.insert(Op32.getNode());
5635     return true;
5636   }
5637 
5638   // LHBRX and LWBRX always clear the higher-order bits.
5639   if (Op32.getMachineOpcode() == PPC::LHBRX ||
5640       Op32.getMachineOpcode() == PPC::LWBRX) {
5641     ToPromote.insert(Op32.getNode());
5642     return true;
5643   }
5644 
5645   // CNT[LT]ZW always produce a 64-bit value in [0,32], and so is zero extended.
5646   if (Op32.getMachineOpcode() == PPC::CNTLZW ||
5647       Op32.getMachineOpcode() == PPC::CNTTZW) {
5648     ToPromote.insert(Op32.getNode());
5649     return true;
5650   }
5651 
5652   // Next, check for those instructions we can look through.
5653 
5654   // Assuming the mask does not wrap around, then the higher-order bits are
5655   // taken directly from the first operand.
5656   if (Op32.getMachineOpcode() == PPC::RLWIMI &&
5657       Op32.getConstantOperandVal(3) <= Op32.getConstantOperandVal(4)) {
5658     SmallPtrSet<SDNode *, 16> ToPromote1;
5659     if (!PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1))
5660       return false;
5661 
5662     ToPromote.insert(Op32.getNode());
5663     ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
5664     return true;
5665   }
5666 
5667   // For OR, the higher-order bits are zero if that is true for both operands.
5668   // For SELECT_I4, the same is true (but the relevant operand numbers are
5669   // shifted by 1).
5670   if (Op32.getMachineOpcode() == PPC::OR ||
5671       Op32.getMachineOpcode() == PPC::SELECT_I4) {
5672     unsigned B = Op32.getMachineOpcode() == PPC::SELECT_I4 ? 1 : 0;
5673     SmallPtrSet<SDNode *, 16> ToPromote1;
5674     if (!PeepholePPC64ZExtGather(Op32.getOperand(B+0), ToPromote1))
5675       return false;
5676     if (!PeepholePPC64ZExtGather(Op32.getOperand(B+1), ToPromote1))
5677       return false;
5678 
5679     ToPromote.insert(Op32.getNode());
5680     ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
5681     return true;
5682   }
5683 
5684   // For ORI and ORIS, we need the higher-order bits of the first operand to be
5685   // zero, and also for the constant to be positive (so that it is not sign
5686   // extended).
5687   if (Op32.getMachineOpcode() == PPC::ORI ||
5688       Op32.getMachineOpcode() == PPC::ORIS) {
5689     SmallPtrSet<SDNode *, 16> ToPromote1;
5690     if (!PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1))
5691       return false;
5692     if (!isUInt<15>(Op32.getConstantOperandVal(1)))
5693       return false;
5694 
5695     ToPromote.insert(Op32.getNode());
5696     ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
5697     return true;
5698   }
5699 
5700   // The higher-order bits of AND are zero if that is true for at least one of
5701   // the operands.
5702   if (Op32.getMachineOpcode() == PPC::AND) {
5703     SmallPtrSet<SDNode *, 16> ToPromote1, ToPromote2;
5704     bool Op0OK =
5705       PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1);
5706     bool Op1OK =
5707       PeepholePPC64ZExtGather(Op32.getOperand(1), ToPromote2);
5708     if (!Op0OK && !Op1OK)
5709       return false;
5710 
5711     ToPromote.insert(Op32.getNode());
5712 
5713     if (Op0OK)
5714       ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
5715 
5716     if (Op1OK)
5717       ToPromote.insert(ToPromote2.begin(), ToPromote2.end());
5718 
5719     return true;
5720   }
5721 
5722   // For ANDI and ANDIS, the higher-order bits are zero if either that is true
5723   // of the first operand, or if the second operand is positive (so that it is
5724   // not sign extended).
5725   if (Op32.getMachineOpcode() == PPC::ANDIo ||
5726       Op32.getMachineOpcode() == PPC::ANDISo) {
5727     SmallPtrSet<SDNode *, 16> ToPromote1;
5728     bool Op0OK =
5729       PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1);
5730     bool Op1OK = isUInt<15>(Op32.getConstantOperandVal(1));
5731     if (!Op0OK && !Op1OK)
5732       return false;
5733 
5734     ToPromote.insert(Op32.getNode());
5735 
5736     if (Op0OK)
5737       ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
5738 
5739     return true;
5740   }
5741 
5742   return false;
5743 }
5744 
5745 void PPCDAGToDAGISel::PeepholePPC64ZExt() {
5746   if (!PPCSubTarget->isPPC64())
5747     return;
5748 
5749   // When we zero-extend from i32 to i64, we use a pattern like this:
5750   // def : Pat<(i64 (zext i32:$in)),
5751   //           (RLDICL (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $in, sub_32),
5752   //                   0, 32)>;
5753   // There are several 32-bit shift/rotate instructions, however, that will
5754   // clear the higher-order bits of their output, rendering the RLDICL
5755   // unnecessary. When that happens, we remove it here, and redefine the
5756   // relevant 32-bit operation to be a 64-bit operation.
5757 
5758   SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
5759   ++Position;
5760 
5761   bool MadeChange = false;
5762   while (Position != CurDAG->allnodes_begin()) {
5763     SDNode *N = &*--Position;
5764     // Skip dead nodes and any non-machine opcodes.
5765     if (N->use_empty() || !N->isMachineOpcode())
5766       continue;
5767 
5768     if (N->getMachineOpcode() != PPC::RLDICL)
5769       continue;
5770 
5771     if (N->getConstantOperandVal(1) != 0 ||
5772         N->getConstantOperandVal(2) != 32)
5773       continue;
5774 
5775     SDValue ISR = N->getOperand(0);
5776     if (!ISR.isMachineOpcode() ||
5777         ISR.getMachineOpcode() != TargetOpcode::INSERT_SUBREG)
5778       continue;
5779 
5780     if (!ISR.hasOneUse())
5781       continue;
5782 
5783     if (ISR.getConstantOperandVal(2) != PPC::sub_32)
5784       continue;
5785 
5786     SDValue IDef = ISR.getOperand(0);
5787     if (!IDef.isMachineOpcode() ||
5788         IDef.getMachineOpcode() != TargetOpcode::IMPLICIT_DEF)
5789       continue;
5790 
5791     // We now know that we're looking at a canonical i32 -> i64 zext. See if we
5792     // can get rid of it.
5793 
5794     SDValue Op32 = ISR->getOperand(1);
5795     if (!Op32.isMachineOpcode())
5796       continue;
5797 
5798     // There are some 32-bit instructions that always clear the high-order 32
5799     // bits, there are also some instructions (like AND) that we can look
5800     // through.
5801     SmallPtrSet<SDNode *, 16> ToPromote;
5802     if (!PeepholePPC64ZExtGather(Op32, ToPromote))
5803       continue;
5804 
5805     // If the ToPromote set contains nodes that have uses outside of the set
5806     // (except for the original INSERT_SUBREG), then abort the transformation.
5807     bool OutsideUse = false;
5808     for (SDNode *PN : ToPromote) {
5809       for (SDNode *UN : PN->uses()) {
5810         if (!ToPromote.count(UN) && UN != ISR.getNode()) {
5811           OutsideUse = true;
5812           break;
5813         }
5814       }
5815 
5816       if (OutsideUse)
5817         break;
5818     }
5819     if (OutsideUse)
5820       continue;
5821 
5822     MadeChange = true;
5823 
5824     // We now know that this zero extension can be removed by promoting to
5825     // nodes in ToPromote to 64-bit operations, where for operations in the
5826     // frontier of the set, we need to insert INSERT_SUBREGs for their
5827     // operands.
5828     for (SDNode *PN : ToPromote) {
5829       unsigned NewOpcode;
5830       switch (PN->getMachineOpcode()) {
5831       default:
5832         llvm_unreachable("Don't know the 64-bit variant of this instruction");
5833       case PPC::RLWINM:    NewOpcode = PPC::RLWINM8; break;
5834       case PPC::RLWNM:     NewOpcode = PPC::RLWNM8; break;
5835       case PPC::SLW:       NewOpcode = PPC::SLW8; break;
5836       case PPC::SRW:       NewOpcode = PPC::SRW8; break;
5837       case PPC::LI:        NewOpcode = PPC::LI8; break;
5838       case PPC::LIS:       NewOpcode = PPC::LIS8; break;
5839       case PPC::LHBRX:     NewOpcode = PPC::LHBRX8; break;
5840       case PPC::LWBRX:     NewOpcode = PPC::LWBRX8; break;
5841       case PPC::CNTLZW:    NewOpcode = PPC::CNTLZW8; break;
5842       case PPC::CNTTZW:    NewOpcode = PPC::CNTTZW8; break;
5843       case PPC::RLWIMI:    NewOpcode = PPC::RLWIMI8; break;
5844       case PPC::OR:        NewOpcode = PPC::OR8; break;
5845       case PPC::SELECT_I4: NewOpcode = PPC::SELECT_I8; break;
5846       case PPC::ORI:       NewOpcode = PPC::ORI8; break;
5847       case PPC::ORIS:      NewOpcode = PPC::ORIS8; break;
5848       case PPC::AND:       NewOpcode = PPC::AND8; break;
5849       case PPC::ANDIo:     NewOpcode = PPC::ANDIo8; break;
5850       case PPC::ANDISo:    NewOpcode = PPC::ANDISo8; break;
5851       }
5852 
5853       // Note: During the replacement process, the nodes will be in an
5854       // inconsistent state (some instructions will have operands with values
5855       // of the wrong type). Once done, however, everything should be right
5856       // again.
5857 
5858       SmallVector<SDValue, 4> Ops;
5859       for (const SDValue &V : PN->ops()) {
5860         if (!ToPromote.count(V.getNode()) && V.getValueType() == MVT::i32 &&
5861             !isa<ConstantSDNode>(V)) {
5862           SDValue ReplOpOps[] = { ISR.getOperand(0), V, ISR.getOperand(2) };
5863           SDNode *ReplOp =
5864             CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, SDLoc(V),
5865                                    ISR.getNode()->getVTList(), ReplOpOps);
5866           Ops.push_back(SDValue(ReplOp, 0));
5867         } else {
5868           Ops.push_back(V);
5869         }
5870       }
5871 
5872       // Because all to-be-promoted nodes only have users that are other
5873       // promoted nodes (or the original INSERT_SUBREG), we can safely replace
5874       // the i32 result value type with i64.
5875 
5876       SmallVector<EVT, 2> NewVTs;
5877       SDVTList VTs = PN->getVTList();
5878       for (unsigned i = 0, ie = VTs.NumVTs; i != ie; ++i)
5879         if (VTs.VTs[i] == MVT::i32)
5880           NewVTs.push_back(MVT::i64);
5881         else
5882           NewVTs.push_back(VTs.VTs[i]);
5883 
5884       DEBUG(dbgs() << "PPC64 ZExt Peephole morphing:\nOld:    ");
5885       DEBUG(PN->dump(CurDAG));
5886 
5887       CurDAG->SelectNodeTo(PN, NewOpcode, CurDAG->getVTList(NewVTs), Ops);
5888 
5889       DEBUG(dbgs() << "\nNew: ");
5890       DEBUG(PN->dump(CurDAG));
5891       DEBUG(dbgs() << "\n");
5892     }
5893 
5894     // Now we replace the original zero extend and its associated INSERT_SUBREG
5895     // with the value feeding the INSERT_SUBREG (which has now been promoted to
5896     // return an i64).
5897 
5898     DEBUG(dbgs() << "PPC64 ZExt Peephole replacing:\nOld:    ");
5899     DEBUG(N->dump(CurDAG));
5900     DEBUG(dbgs() << "\nNew: ");
5901     DEBUG(Op32.getNode()->dump(CurDAG));
5902     DEBUG(dbgs() << "\n");
5903 
5904     ReplaceUses(N, Op32.getNode());
5905   }
5906 
5907   if (MadeChange)
5908     CurDAG->RemoveDeadNodes();
5909 }
5910 
5911 void PPCDAGToDAGISel::PeepholePPC64() {
5912   // These optimizations are currently supported only for 64-bit SVR4.
5913   if (PPCSubTarget->isDarwin() || !PPCSubTarget->isPPC64())
5914     return;
5915 
5916   SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
5917   ++Position;
5918 
5919   while (Position != CurDAG->allnodes_begin()) {
5920     SDNode *N = &*--Position;
5921     // Skip dead nodes and any non-machine opcodes.
5922     if (N->use_empty() || !N->isMachineOpcode())
5923       continue;
5924 
5925     unsigned FirstOp;
5926     unsigned StorageOpcode = N->getMachineOpcode();
5927 
5928     switch (StorageOpcode) {
5929     default: continue;
5930 
5931     case PPC::LBZ:
5932     case PPC::LBZ8:
5933     case PPC::LD:
5934     case PPC::LFD:
5935     case PPC::LFS:
5936     case PPC::LHA:
5937     case PPC::LHA8:
5938     case PPC::LHZ:
5939     case PPC::LHZ8:
5940     case PPC::LWA:
5941     case PPC::LWZ:
5942     case PPC::LWZ8:
5943       FirstOp = 0;
5944       break;
5945 
5946     case PPC::STB:
5947     case PPC::STB8:
5948     case PPC::STD:
5949     case PPC::STFD:
5950     case PPC::STFS:
5951     case PPC::STH:
5952     case PPC::STH8:
5953     case PPC::STW:
5954     case PPC::STW8:
5955       FirstOp = 1;
5956       break;
5957     }
5958 
5959     // If this is a load or store with a zero offset, or within the alignment,
5960     // we may be able to fold an add-immediate into the memory operation.
5961     // The check against alignment is below, as it can't occur until we check
5962     // the arguments to N
5963     if (!isa<ConstantSDNode>(N->getOperand(FirstOp)))
5964       continue;
5965 
5966     SDValue Base = N->getOperand(FirstOp + 1);
5967     if (!Base.isMachineOpcode())
5968       continue;
5969 
5970     unsigned Flags = 0;
5971     bool ReplaceFlags = true;
5972 
5973     // When the feeding operation is an add-immediate of some sort,
5974     // determine whether we need to add relocation information to the
5975     // target flags on the immediate operand when we fold it into the
5976     // load instruction.
5977     //
5978     // For something like ADDItocL, the relocation information is
5979     // inferred from the opcode; when we process it in the AsmPrinter,
5980     // we add the necessary relocation there.  A load, though, can receive
5981     // relocation from various flavors of ADDIxxx, so we need to carry
5982     // the relocation information in the target flags.
5983     switch (Base.getMachineOpcode()) {
5984     default: continue;
5985 
5986     case PPC::ADDI8:
5987     case PPC::ADDI:
5988       // In some cases (such as TLS) the relocation information
5989       // is already in place on the operand, so copying the operand
5990       // is sufficient.
5991       ReplaceFlags = false;
5992       // For these cases, the immediate may not be divisible by 4, in
5993       // which case the fold is illegal for DS-form instructions.  (The
5994       // other cases provide aligned addresses and are always safe.)
5995       if ((StorageOpcode == PPC::LWA ||
5996            StorageOpcode == PPC::LD  ||
5997            StorageOpcode == PPC::STD) &&
5998           (!isa<ConstantSDNode>(Base.getOperand(1)) ||
5999            Base.getConstantOperandVal(1) % 4 != 0))
6000         continue;
6001       break;
6002     case PPC::ADDIdtprelL:
6003       Flags = PPCII::MO_DTPREL_LO;
6004       break;
6005     case PPC::ADDItlsldL:
6006       Flags = PPCII::MO_TLSLD_LO;
6007       break;
6008     case PPC::ADDItocL:
6009       Flags = PPCII::MO_TOC_LO;
6010       break;
6011     }
6012 
6013     SDValue ImmOpnd = Base.getOperand(1);
6014 
6015     // On PPC64, the TOC base pointer is guaranteed by the ABI only to have
6016     // 8-byte alignment, and so we can only use offsets less than 8 (otherwise,
6017     // we might have needed different @ha relocation values for the offset
6018     // pointers).
6019     int MaxDisplacement = 7;
6020     if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
6021       const GlobalValue *GV = GA->getGlobal();
6022       MaxDisplacement = std::min((int) GV->getAlignment() - 1, MaxDisplacement);
6023     }
6024 
6025     bool UpdateHBase = false;
6026     SDValue HBase = Base.getOperand(0);
6027 
6028     int Offset = N->getConstantOperandVal(FirstOp);
6029     if (ReplaceFlags) {
6030       if (Offset < 0 || Offset > MaxDisplacement) {
6031         // If we have a addi(toc@l)/addis(toc@ha) pair, and the addis has only
6032         // one use, then we can do this for any offset, we just need to also
6033         // update the offset (i.e. the symbol addend) on the addis also.
6034         if (Base.getMachineOpcode() != PPC::ADDItocL)
6035           continue;
6036 
6037         if (!HBase.isMachineOpcode() ||
6038             HBase.getMachineOpcode() != PPC::ADDIStocHA)
6039           continue;
6040 
6041         if (!Base.hasOneUse() || !HBase.hasOneUse())
6042           continue;
6043 
6044         SDValue HImmOpnd = HBase.getOperand(1);
6045         if (HImmOpnd != ImmOpnd)
6046           continue;
6047 
6048         UpdateHBase = true;
6049       }
6050     } else {
6051       // If we're directly folding the addend from an addi instruction, then:
6052       //  1. In general, the offset on the memory access must be zero.
6053       //  2. If the addend is a constant, then it can be combined with a
6054       //     non-zero offset, but only if the result meets the encoding
6055       //     requirements.
6056       if (auto *C = dyn_cast<ConstantSDNode>(ImmOpnd)) {
6057         Offset += C->getSExtValue();
6058 
6059         if ((StorageOpcode == PPC::LWA || StorageOpcode == PPC::LD ||
6060              StorageOpcode == PPC::STD) && (Offset % 4) != 0)
6061           continue;
6062 
6063         if (!isInt<16>(Offset))
6064           continue;
6065 
6066         ImmOpnd = CurDAG->getTargetConstant(Offset, SDLoc(ImmOpnd),
6067                                             ImmOpnd.getValueType());
6068       } else if (Offset != 0) {
6069         continue;
6070       }
6071     }
6072 
6073     // We found an opportunity.  Reverse the operands from the add
6074     // immediate and substitute them into the load or store.  If
6075     // needed, update the target flags for the immediate operand to
6076     // reflect the necessary relocation information.
6077     DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase:    ");
6078     DEBUG(Base->dump(CurDAG));
6079     DEBUG(dbgs() << "\nN: ");
6080     DEBUG(N->dump(CurDAG));
6081     DEBUG(dbgs() << "\n");
6082 
6083     // If the relocation information isn't already present on the
6084     // immediate operand, add it now.
6085     if (ReplaceFlags) {
6086       if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
6087         SDLoc dl(GA);
6088         const GlobalValue *GV = GA->getGlobal();
6089         // We can't perform this optimization for data whose alignment
6090         // is insufficient for the instruction encoding.
6091         if (GV->getAlignment() < 4 &&
6092             (StorageOpcode == PPC::LD || StorageOpcode == PPC::STD ||
6093              StorageOpcode == PPC::LWA || (Offset % 4) != 0)) {
6094           DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n");
6095           continue;
6096         }
6097         ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, Offset, Flags);
6098       } else if (ConstantPoolSDNode *CP =
6099                  dyn_cast<ConstantPoolSDNode>(ImmOpnd)) {
6100         const Constant *C = CP->getConstVal();
6101         ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64,
6102                                                 CP->getAlignment(),
6103                                                 Offset, Flags);
6104       }
6105     }
6106 
6107     if (FirstOp == 1) // Store
6108       (void)CurDAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd,
6109                                        Base.getOperand(0), N->getOperand(3));
6110     else // Load
6111       (void)CurDAG->UpdateNodeOperands(N, ImmOpnd, Base.getOperand(0),
6112                                        N->getOperand(2));
6113 
6114     if (UpdateHBase)
6115       (void)CurDAG->UpdateNodeOperands(HBase.getNode(), HBase.getOperand(0),
6116                                        ImmOpnd);
6117 
6118     // The add-immediate may now be dead, in which case remove it.
6119     if (Base.getNode()->use_empty())
6120       CurDAG->RemoveDeadNode(Base.getNode());
6121   }
6122 }
6123 
6124 /// createPPCISelDag - This pass converts a legalized DAG into a
6125 /// PowerPC-specific DAG, ready for instruction scheduling.
6126 ///
6127 FunctionPass *llvm::createPPCISelDag(PPCTargetMachine &TM,
6128                                      CodeGenOpt::Level OptLevel) {
6129   return new PPCDAGToDAGISel(TM, OptLevel);
6130 }
6131