1 //===-- PPCISelDAGToDAG.cpp - PPC --pattern matching inst selector --------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines a pattern matching instruction selector for PowerPC,
11 // converting from a legalized dag to a PPC dag.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "MCTargetDesc/PPCMCTargetDesc.h"
16 #include "MCTargetDesc/PPCPredicates.h"
17 #include "PPC.h"
18 #include "PPCISelLowering.h"
19 #include "PPCMachineFunctionInfo.h"
20 #include "PPCSubtarget.h"
21 #include "PPCTargetMachine.h"
22 #include "llvm/ADT/APInt.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/ADT/SmallPtrSet.h"
26 #include "llvm/ADT/SmallVector.h"
27 #include "llvm/ADT/Statistic.h"
28 #include "llvm/Analysis/BranchProbabilityInfo.h"
29 #include "llvm/CodeGen/FunctionLoweringInfo.h"
30 #include "llvm/CodeGen/ISDOpcodes.h"
31 #include "llvm/CodeGen/MachineBasicBlock.h"
32 #include "llvm/CodeGen/MachineFunction.h"
33 #include "llvm/CodeGen/MachineInstrBuilder.h"
34 #include "llvm/CodeGen/MachineRegisterInfo.h"
35 #include "llvm/CodeGen/SelectionDAG.h"
36 #include "llvm/CodeGen/SelectionDAGISel.h"
37 #include "llvm/CodeGen/SelectionDAGNodes.h"
38 #include "llvm/CodeGen/TargetInstrInfo.h"
39 #include "llvm/CodeGen/TargetRegisterInfo.h"
40 #include "llvm/CodeGen/ValueTypes.h"
41 #include "llvm/IR/BasicBlock.h"
42 #include "llvm/IR/DebugLoc.h"
43 #include "llvm/IR/Function.h"
44 #include "llvm/IR/GlobalValue.h"
45 #include "llvm/IR/InlineAsm.h"
46 #include "llvm/IR/InstrTypes.h"
47 #include "llvm/IR/Module.h"
48 #include "llvm/Support/Casting.h"
49 #include "llvm/Support/CodeGen.h"
50 #include "llvm/Support/CommandLine.h"
51 #include "llvm/Support/Compiler.h"
52 #include "llvm/Support/Debug.h"
53 #include "llvm/Support/ErrorHandling.h"
54 #include "llvm/Support/KnownBits.h"
55 #include "llvm/Support/MachineValueType.h"
56 #include "llvm/Support/MathExtras.h"
57 #include "llvm/Support/raw_ostream.h"
58 #include <algorithm>
59 #include <cassert>
60 #include <cstdint>
61 #include <iterator>
62 #include <limits>
63 #include <memory>
64 #include <new>
65 #include <tuple>
66 #include <utility>
67 
68 using namespace llvm;
69 
70 #define DEBUG_TYPE "ppc-codegen"
71 
72 STATISTIC(NumSextSetcc,
73           "Number of (sext(setcc)) nodes expanded into GPR sequence.");
74 STATISTIC(NumZextSetcc,
75           "Number of (zext(setcc)) nodes expanded into GPR sequence.");
76 STATISTIC(SignExtensionsAdded,
77           "Number of sign extensions for compare inputs added.");
78 STATISTIC(ZeroExtensionsAdded,
79           "Number of zero extensions for compare inputs added.");
80 STATISTIC(NumLogicOpsOnComparison,
81           "Number of logical ops on i1 values calculated in GPR.");
82 STATISTIC(OmittedForNonExtendUses,
83           "Number of compares not eliminated as they have non-extending uses.");
84 
85 // FIXME: Remove this once the bug has been fixed!
86 cl::opt<bool> ANDIGlueBug("expose-ppc-andi-glue-bug",
87 cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden);
88 
89 static cl::opt<bool>
90     UseBitPermRewriter("ppc-use-bit-perm-rewriter", cl::init(true),
91                        cl::desc("use aggressive ppc isel for bit permutations"),
92                        cl::Hidden);
93 static cl::opt<bool> BPermRewriterNoMasking(
94     "ppc-bit-perm-rewriter-stress-rotates",
95     cl::desc("stress rotate selection in aggressive ppc isel for "
96              "bit permutations"),
97     cl::Hidden);
98 
99 static cl::opt<bool> EnableBranchHint(
100   "ppc-use-branch-hint", cl::init(true),
101     cl::desc("Enable static hinting of branches on ppc"),
102     cl::Hidden);
103 
104 static cl::opt<bool> EnableTLSOpt(
105   "ppc-tls-opt", cl::init(true),
106     cl::desc("Enable tls optimization peephole"),
107     cl::Hidden);
108 
109 enum ICmpInGPRType { ICGPR_All, ICGPR_None, ICGPR_I32, ICGPR_I64,
110   ICGPR_NonExtIn, ICGPR_Zext, ICGPR_Sext, ICGPR_ZextI32,
111   ICGPR_SextI32, ICGPR_ZextI64, ICGPR_SextI64 };
112 
113 static cl::opt<ICmpInGPRType> CmpInGPR(
114   "ppc-gpr-icmps", cl::Hidden, cl::init(ICGPR_All),
115   cl::desc("Specify the types of comparisons to emit GPR-only code for."),
116   cl::values(clEnumValN(ICGPR_None, "none", "Do not modify integer comparisons."),
117              clEnumValN(ICGPR_All, "all", "All possible int comparisons in GPRs."),
118              clEnumValN(ICGPR_I32, "i32", "Only i32 comparisons in GPRs."),
119              clEnumValN(ICGPR_I64, "i64", "Only i64 comparisons in GPRs."),
120              clEnumValN(ICGPR_NonExtIn, "nonextin",
121                         "Only comparisons where inputs don't need [sz]ext."),
122              clEnumValN(ICGPR_Zext, "zext", "Only comparisons with zext result."),
123              clEnumValN(ICGPR_ZextI32, "zexti32",
124                         "Only i32 comparisons with zext result."),
125              clEnumValN(ICGPR_ZextI64, "zexti64",
126                         "Only i64 comparisons with zext result."),
127              clEnumValN(ICGPR_Sext, "sext", "Only comparisons with sext result."),
128              clEnumValN(ICGPR_SextI32, "sexti32",
129                         "Only i32 comparisons with sext result."),
130              clEnumValN(ICGPR_SextI64, "sexti64",
131                         "Only i64 comparisons with sext result.")));
132 namespace {
133 
134   //===--------------------------------------------------------------------===//
135   /// PPCDAGToDAGISel - PPC specific code to select PPC machine
136   /// instructions for SelectionDAG operations.
137   ///
138   class PPCDAGToDAGISel : public SelectionDAGISel {
139     const PPCTargetMachine &TM;
140     const PPCSubtarget *PPCSubTarget;
141     const PPCTargetLowering *PPCLowering;
142     unsigned GlobalBaseReg;
143 
144   public:
145     explicit PPCDAGToDAGISel(PPCTargetMachine &tm, CodeGenOpt::Level OptLevel)
146         : SelectionDAGISel(tm, OptLevel), TM(tm) {}
147 
148     bool runOnMachineFunction(MachineFunction &MF) override {
149       // Make sure we re-emit a set of the global base reg if necessary
150       GlobalBaseReg = 0;
151       PPCSubTarget = &MF.getSubtarget<PPCSubtarget>();
152       PPCLowering = PPCSubTarget->getTargetLowering();
153       SelectionDAGISel::runOnMachineFunction(MF);
154 
155       if (!PPCSubTarget->isSVR4ABI())
156         InsertVRSaveCode(MF);
157 
158       return true;
159     }
160 
161     void PreprocessISelDAG() override;
162     void PostprocessISelDAG() override;
163 
164     /// getI16Imm - Return a target constant with the specified value, of type
165     /// i16.
166     inline SDValue getI16Imm(unsigned Imm, const SDLoc &dl) {
167       return CurDAG->getTargetConstant(Imm, dl, MVT::i16);
168     }
169 
170     /// getI32Imm - Return a target constant with the specified value, of type
171     /// i32.
172     inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
173       return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
174     }
175 
176     /// getI64Imm - Return a target constant with the specified value, of type
177     /// i64.
178     inline SDValue getI64Imm(uint64_t Imm, const SDLoc &dl) {
179       return CurDAG->getTargetConstant(Imm, dl, MVT::i64);
180     }
181 
182     /// getSmallIPtrImm - Return a target constant of pointer type.
183     inline SDValue getSmallIPtrImm(unsigned Imm, const SDLoc &dl) {
184       return CurDAG->getTargetConstant(
185           Imm, dl, PPCLowering->getPointerTy(CurDAG->getDataLayout()));
186     }
187 
188     /// isRotateAndMask - Returns true if Mask and Shift can be folded into a
189     /// rotate and mask opcode and mask operation.
190     static bool isRotateAndMask(SDNode *N, unsigned Mask, bool isShiftMask,
191                                 unsigned &SH, unsigned &MB, unsigned &ME);
192 
193     /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC
194     /// base register.  Return the virtual register that holds this value.
195     SDNode *getGlobalBaseReg();
196 
197     void selectFrameIndex(SDNode *SN, SDNode *N, unsigned Offset = 0);
198 
199     // Select - Convert the specified operand from a target-independent to a
200     // target-specific node if it hasn't already been changed.
201     void Select(SDNode *N) override;
202 
203     bool tryBitfieldInsert(SDNode *N);
204     bool tryBitPermutation(SDNode *N);
205     bool tryIntCompareInGPR(SDNode *N);
206 
207     // tryTLSXFormLoad - Convert an ISD::LOAD fed by a PPCISD::ADD_TLS into
208     // an X-Form load instruction with the offset being a relocation coming from
209     // the PPCISD::ADD_TLS.
210     bool tryTLSXFormLoad(LoadSDNode *N);
211     // tryTLSXFormStore - Convert an ISD::STORE fed by a PPCISD::ADD_TLS into
212     // an X-Form store instruction with the offset being a relocation coming from
213     // the PPCISD::ADD_TLS.
214     bool tryTLSXFormStore(StoreSDNode *N);
215     /// SelectCC - Select a comparison of the specified values with the
216     /// specified condition code, returning the CR# of the expression.
217     SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
218                      const SDLoc &dl);
219 
220     /// SelectAddrImm - Returns true if the address N can be represented by
221     /// a base register plus a signed 16-bit displacement [r+imm].
222     bool SelectAddrImm(SDValue N, SDValue &Disp,
223                        SDValue &Base) {
224       return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 0);
225     }
226 
227     /// SelectAddrImmOffs - Return true if the operand is valid for a preinc
228     /// immediate field.  Note that the operand at this point is already the
229     /// result of a prior SelectAddressRegImm call.
230     bool SelectAddrImmOffs(SDValue N, SDValue &Out) const {
231       if (N.getOpcode() == ISD::TargetConstant ||
232           N.getOpcode() == ISD::TargetGlobalAddress) {
233         Out = N;
234         return true;
235       }
236 
237       return false;
238     }
239 
240     /// SelectAddrIdx - Given the specified addressed, check to see if it can be
241     /// represented as an indexed [r+r] operation.  Returns false if it can
242     /// be represented by [r+imm], which are preferred.
243     bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) {
244       return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG);
245     }
246 
247     /// SelectAddrIdxOnly - Given the specified addressed, force it to be
248     /// represented as an indexed [r+r] operation.
249     bool SelectAddrIdxOnly(SDValue N, SDValue &Base, SDValue &Index) {
250       return PPCLowering->SelectAddressRegRegOnly(N, Base, Index, *CurDAG);
251     }
252 
253     /// SelectAddrImmX4 - Returns true if the address N can be represented by
254     /// a base register plus a signed 16-bit displacement that is a multiple of 4.
255     /// Suitable for use by STD and friends.
256     bool SelectAddrImmX4(SDValue N, SDValue &Disp, SDValue &Base) {
257       return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 4);
258     }
259 
260     bool SelectAddrImmX16(SDValue N, SDValue &Disp, SDValue &Base) {
261       return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 16);
262     }
263 
264     // Select an address into a single register.
265     bool SelectAddr(SDValue N, SDValue &Base) {
266       Base = N;
267       return true;
268     }
269 
270     /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
271     /// inline asm expressions.  It is always correct to compute the value into
272     /// a register.  The case of adding a (possibly relocatable) constant to a
273     /// register can be improved, but it is wrong to substitute Reg+Reg for
274     /// Reg in an asm, because the load or store opcode would have to change.
275     bool SelectInlineAsmMemoryOperand(const SDValue &Op,
276                                       unsigned ConstraintID,
277                                       std::vector<SDValue> &OutOps) override {
278       switch(ConstraintID) {
279       default:
280         errs() << "ConstraintID: " << ConstraintID << "\n";
281         llvm_unreachable("Unexpected asm memory constraint");
282       case InlineAsm::Constraint_es:
283       case InlineAsm::Constraint_i:
284       case InlineAsm::Constraint_m:
285       case InlineAsm::Constraint_o:
286       case InlineAsm::Constraint_Q:
287       case InlineAsm::Constraint_Z:
288       case InlineAsm::Constraint_Zy:
289         // We need to make sure that this one operand does not end up in r0
290         // (because we might end up lowering this as 0(%op)).
291         const TargetRegisterInfo *TRI = PPCSubTarget->getRegisterInfo();
292         const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF, /*Kind=*/1);
293         SDLoc dl(Op);
294         SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i32);
295         SDValue NewOp =
296           SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
297                                          dl, Op.getValueType(),
298                                          Op, RC), 0);
299 
300         OutOps.push_back(NewOp);
301         return false;
302       }
303       return true;
304     }
305 
306     void InsertVRSaveCode(MachineFunction &MF);
307 
308     StringRef getPassName() const override {
309       return "PowerPC DAG->DAG Pattern Instruction Selection";
310     }
311 
312 // Include the pieces autogenerated from the target description.
313 #include "PPCGenDAGISel.inc"
314 
315 private:
316     bool trySETCC(SDNode *N);
317 
318     void PeepholePPC64();
319     void PeepholePPC64ZExt();
320     void PeepholeCROps();
321 
322     SDValue combineToCMPB(SDNode *N);
323     void foldBoolExts(SDValue &Res, SDNode *&N);
324 
325     bool AllUsersSelectZero(SDNode *N);
326     void SwapAllSelectUsers(SDNode *N);
327 
328     bool isOffsetMultipleOf(SDNode *N, unsigned Val) const;
329     void transferMemOperands(SDNode *N, SDNode *Result);
330   };
331 
332 } // end anonymous namespace
333 
334 /// InsertVRSaveCode - Once the entire function has been instruction selected,
335 /// all virtual registers are created and all machine instructions are built,
336 /// check to see if we need to save/restore VRSAVE.  If so, do it.
337 void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) {
338   // Check to see if this function uses vector registers, which means we have to
339   // save and restore the VRSAVE register and update it with the regs we use.
340   //
341   // In this case, there will be virtual registers of vector type created
342   // by the scheduler.  Detect them now.
343   bool HasVectorVReg = false;
344   for (unsigned i = 0, e = RegInfo->getNumVirtRegs(); i != e; ++i) {
345     unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
346     if (RegInfo->getRegClass(Reg) == &PPC::VRRCRegClass) {
347       HasVectorVReg = true;
348       break;
349     }
350   }
351   if (!HasVectorVReg) return;  // nothing to do.
352 
353   // If we have a vector register, we want to emit code into the entry and exit
354   // blocks to save and restore the VRSAVE register.  We do this here (instead
355   // of marking all vector instructions as clobbering VRSAVE) for two reasons:
356   //
357   // 1. This (trivially) reduces the load on the register allocator, by not
358   //    having to represent the live range of the VRSAVE register.
359   // 2. This (more significantly) allows us to create a temporary virtual
360   //    register to hold the saved VRSAVE value, allowing this temporary to be
361   //    register allocated, instead of forcing it to be spilled to the stack.
362 
363   // Create two vregs - one to hold the VRSAVE register that is live-in to the
364   // function and one for the value after having bits or'd into it.
365   unsigned InVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
366   unsigned UpdatedVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
367 
368   const TargetInstrInfo &TII = *PPCSubTarget->getInstrInfo();
369   MachineBasicBlock &EntryBB = *Fn.begin();
370   DebugLoc dl;
371   // Emit the following code into the entry block:
372   // InVRSAVE = MFVRSAVE
373   // UpdatedVRSAVE = UPDATE_VRSAVE InVRSAVE
374   // MTVRSAVE UpdatedVRSAVE
375   MachineBasicBlock::iterator IP = EntryBB.begin();  // Insert Point
376   BuildMI(EntryBB, IP, dl, TII.get(PPC::MFVRSAVE), InVRSAVE);
377   BuildMI(EntryBB, IP, dl, TII.get(PPC::UPDATE_VRSAVE),
378           UpdatedVRSAVE).addReg(InVRSAVE);
379   BuildMI(EntryBB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(UpdatedVRSAVE);
380 
381   // Find all return blocks, outputting a restore in each epilog.
382   for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
383     if (BB->isReturnBlock()) {
384       IP = BB->end(); --IP;
385 
386       // Skip over all terminator instructions, which are part of the return
387       // sequence.
388       MachineBasicBlock::iterator I2 = IP;
389       while (I2 != BB->begin() && (--I2)->isTerminator())
390         IP = I2;
391 
392       // Emit: MTVRSAVE InVRSave
393       BuildMI(*BB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(InVRSAVE);
394     }
395   }
396 }
397 
398 /// getGlobalBaseReg - Output the instructions required to put the
399 /// base address to use for accessing globals into a register.
400 ///
401 SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
402   if (!GlobalBaseReg) {
403     const TargetInstrInfo &TII = *PPCSubTarget->getInstrInfo();
404     // Insert the set of GlobalBaseReg into the first MBB of the function
405     MachineBasicBlock &FirstMBB = MF->front();
406     MachineBasicBlock::iterator MBBI = FirstMBB.begin();
407     const Module *M = MF->getFunction().getParent();
408     DebugLoc dl;
409 
410     if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) == MVT::i32) {
411       if (PPCSubTarget->isTargetELF()) {
412         GlobalBaseReg = PPC::R30;
413         if (M->getPICLevel() == PICLevel::SmallPIC) {
414           BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MoveGOTtoLR));
415           BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
416           MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
417         } else {
418           BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
419           BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
420           unsigned TempReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
421           BuildMI(FirstMBB, MBBI, dl,
422                   TII.get(PPC::UpdateGBR), GlobalBaseReg)
423                   .addReg(TempReg, RegState::Define).addReg(GlobalBaseReg);
424           MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
425         }
426       } else {
427         GlobalBaseReg =
428           RegInfo->createVirtualRegister(&PPC::GPRC_and_GPRC_NOR0RegClass);
429         BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
430         BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
431       }
432     } else {
433       // We must ensure that this sequence is dominated by the prologue.
434       // FIXME: This is a bit of a big hammer since we don't get the benefits
435       // of shrink-wrapping whenever we emit this instruction. Considering
436       // this is used in any function where we emit a jump table, this may be
437       // a significant limitation. We should consider inserting this in the
438       // block where it is used and then commoning this sequence up if it
439       // appears in multiple places.
440       // Note: on ISA 3.0 cores, we can use lnia (addpcis) insteand of
441       // MovePCtoLR8.
442       MF->getInfo<PPCFunctionInfo>()->setShrinkWrapDisabled(true);
443       GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
444       BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8));
445       BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR8), GlobalBaseReg);
446     }
447   }
448   return CurDAG->getRegister(GlobalBaseReg,
449                              PPCLowering->getPointerTy(CurDAG->getDataLayout()))
450       .getNode();
451 }
452 
453 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
454 /// operand. If so Imm will receive the 32-bit value.
455 static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
456   if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
457     Imm = cast<ConstantSDNode>(N)->getZExtValue();
458     return true;
459   }
460   return false;
461 }
462 
463 /// isInt64Immediate - This method tests to see if the node is a 64-bit constant
464 /// operand.  If so Imm will receive the 64-bit value.
465 static bool isInt64Immediate(SDNode *N, uint64_t &Imm) {
466   if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i64) {
467     Imm = cast<ConstantSDNode>(N)->getZExtValue();
468     return true;
469   }
470   return false;
471 }
472 
473 // isInt32Immediate - This method tests to see if a constant operand.
474 // If so Imm will receive the 32 bit value.
475 static bool isInt32Immediate(SDValue N, unsigned &Imm) {
476   return isInt32Immediate(N.getNode(), Imm);
477 }
478 
479 /// isInt64Immediate - This method tests to see if the value is a 64-bit
480 /// constant operand. If so Imm will receive the 64-bit value.
481 static bool isInt64Immediate(SDValue N, uint64_t &Imm) {
482   return isInt64Immediate(N.getNode(), Imm);
483 }
484 
485 static unsigned getBranchHint(unsigned PCC, FunctionLoweringInfo *FuncInfo,
486                               const SDValue &DestMBB) {
487   assert(isa<BasicBlockSDNode>(DestMBB));
488 
489   if (!FuncInfo->BPI) return PPC::BR_NO_HINT;
490 
491   const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
492   const TerminatorInst *BBTerm = BB->getTerminator();
493 
494   if (BBTerm->getNumSuccessors() != 2) return PPC::BR_NO_HINT;
495 
496   const BasicBlock *TBB = BBTerm->getSuccessor(0);
497   const BasicBlock *FBB = BBTerm->getSuccessor(1);
498 
499   auto TProb = FuncInfo->BPI->getEdgeProbability(BB, TBB);
500   auto FProb = FuncInfo->BPI->getEdgeProbability(BB, FBB);
501 
502   // We only want to handle cases which are easy to predict at static time, e.g.
503   // C++ throw statement, that is very likely not taken, or calling never
504   // returned function, e.g. stdlib exit(). So we set Threshold to filter
505   // unwanted cases.
506   //
507   // Below is LLVM branch weight table, we only want to handle case 1, 2
508   //
509   // Case                  Taken:Nontaken  Example
510   // 1. Unreachable        1048575:1       C++ throw, stdlib exit(),
511   // 2. Invoke-terminating 1:1048575
512   // 3. Coldblock          4:64            __builtin_expect
513   // 4. Loop Branch        124:4           For loop
514   // 5. PH/ZH/FPH          20:12
515   const uint32_t Threshold = 10000;
516 
517   if (std::max(TProb, FProb) / Threshold < std::min(TProb, FProb))
518     return PPC::BR_NO_HINT;
519 
520   DEBUG(dbgs() << "Use branch hint for '" << FuncInfo->Fn->getName() << "::"
521                << BB->getName() << "'\n"
522                << " -> " << TBB->getName() << ": " << TProb << "\n"
523                << " -> " << FBB->getName() << ": " << FProb << "\n");
524 
525   const BasicBlockSDNode *BBDN = cast<BasicBlockSDNode>(DestMBB);
526 
527   // If Dest BasicBlock is False-BasicBlock (FBB), swap branch probabilities,
528   // because we want 'TProb' stands for 'branch probability' to Dest BasicBlock
529   if (BBDN->getBasicBlock()->getBasicBlock() != TBB)
530     std::swap(TProb, FProb);
531 
532   return (TProb > FProb) ? PPC::BR_TAKEN_HINT : PPC::BR_NONTAKEN_HINT;
533 }
534 
535 // isOpcWithIntImmediate - This method tests to see if the node is a specific
536 // opcode and that it has a immediate integer right operand.
537 // If so Imm will receive the 32 bit value.
538 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
539   return N->getOpcode() == Opc
540          && isInt32Immediate(N->getOperand(1).getNode(), Imm);
541 }
542 
543 void PPCDAGToDAGISel::selectFrameIndex(SDNode *SN, SDNode *N, unsigned Offset) {
544   SDLoc dl(SN);
545   int FI = cast<FrameIndexSDNode>(N)->getIndex();
546   SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0));
547   unsigned Opc = N->getValueType(0) == MVT::i32 ? PPC::ADDI : PPC::ADDI8;
548   if (SN->hasOneUse())
549     CurDAG->SelectNodeTo(SN, Opc, N->getValueType(0), TFI,
550                          getSmallIPtrImm(Offset, dl));
551   else
552     ReplaceNode(SN, CurDAG->getMachineNode(Opc, dl, N->getValueType(0), TFI,
553                                            getSmallIPtrImm(Offset, dl)));
554 }
555 
556 bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask,
557                                       bool isShiftMask, unsigned &SH,
558                                       unsigned &MB, unsigned &ME) {
559   // Don't even go down this path for i64, since different logic will be
560   // necessary for rldicl/rldicr/rldimi.
561   if (N->getValueType(0) != MVT::i32)
562     return false;
563 
564   unsigned Shift  = 32;
565   unsigned Indeterminant = ~0;  // bit mask marking indeterminant results
566   unsigned Opcode = N->getOpcode();
567   if (N->getNumOperands() != 2 ||
568       !isInt32Immediate(N->getOperand(1).getNode(), Shift) || (Shift > 31))
569     return false;
570 
571   if (Opcode == ISD::SHL) {
572     // apply shift left to mask if it comes first
573     if (isShiftMask) Mask = Mask << Shift;
574     // determine which bits are made indeterminant by shift
575     Indeterminant = ~(0xFFFFFFFFu << Shift);
576   } else if (Opcode == ISD::SRL) {
577     // apply shift right to mask if it comes first
578     if (isShiftMask) Mask = Mask >> Shift;
579     // determine which bits are made indeterminant by shift
580     Indeterminant = ~(0xFFFFFFFFu >> Shift);
581     // adjust for the left rotate
582     Shift = 32 - Shift;
583   } else if (Opcode == ISD::ROTL) {
584     Indeterminant = 0;
585   } else {
586     return false;
587   }
588 
589   // if the mask doesn't intersect any Indeterminant bits
590   if (Mask && !(Mask & Indeterminant)) {
591     SH = Shift & 31;
592     // make sure the mask is still a mask (wrap arounds may not be)
593     return isRunOfOnes(Mask, MB, ME);
594   }
595   return false;
596 }
597 
598 bool PPCDAGToDAGISel::tryTLSXFormStore(StoreSDNode *ST) {
599   SDValue Base = ST->getBasePtr();
600   if (Base.getOpcode() != PPCISD::ADD_TLS)
601     return false;
602   SDValue Offset = ST->getOffset();
603   if (!Offset.isUndef())
604     return false;
605 
606   SDLoc dl(ST);
607   EVT MemVT = ST->getMemoryVT();
608   EVT RegVT = ST->getValue().getValueType();
609 
610   unsigned Opcode;
611   switch (MemVT.getSimpleVT().SimpleTy) {
612     default:
613       return false;
614     case MVT::i8: {
615       Opcode = (RegVT == MVT::i32) ? PPC::STBXTLS_32 : PPC::STBXTLS;
616       break;
617     }
618     case MVT::i16: {
619       Opcode = (RegVT == MVT::i32) ? PPC::STHXTLS_32 : PPC::STHXTLS;
620       break;
621     }
622     case MVT::i32: {
623       Opcode = (RegVT == MVT::i32) ? PPC::STWXTLS_32 : PPC::STWXTLS;
624       break;
625     }
626     case MVT::i64: {
627       Opcode = PPC::STDXTLS;
628       break;
629     }
630   }
631   SDValue Chain = ST->getChain();
632   SDVTList VTs = ST->getVTList();
633   SDValue Ops[] = {ST->getValue(), Base.getOperand(0), Base.getOperand(1),
634                    Chain};
635   SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
636   transferMemOperands(ST, MN);
637   ReplaceNode(ST, MN);
638   return true;
639 }
640 
641 bool PPCDAGToDAGISel::tryTLSXFormLoad(LoadSDNode *LD) {
642   SDValue Base = LD->getBasePtr();
643   if (Base.getOpcode() != PPCISD::ADD_TLS)
644     return false;
645   SDValue Offset = LD->getOffset();
646   if (!Offset.isUndef())
647     return false;
648 
649   SDLoc dl(LD);
650   EVT MemVT = LD->getMemoryVT();
651   EVT RegVT = LD->getValueType(0);
652   unsigned Opcode;
653   switch (MemVT.getSimpleVT().SimpleTy) {
654     default:
655       return false;
656     case MVT::i8: {
657       Opcode = (RegVT == MVT::i32) ? PPC::LBZXTLS_32 : PPC::LBZXTLS;
658       break;
659     }
660     case MVT::i16: {
661       Opcode = (RegVT == MVT::i32) ? PPC::LHZXTLS_32 : PPC::LHZXTLS;
662       break;
663     }
664     case MVT::i32: {
665       Opcode = (RegVT == MVT::i32) ? PPC::LWZXTLS_32 : PPC::LWZXTLS;
666       break;
667     }
668     case MVT::i64: {
669       Opcode = PPC::LDXTLS;
670       break;
671     }
672   }
673   SDValue Chain = LD->getChain();
674   SDVTList VTs = LD->getVTList();
675   SDValue Ops[] = {Base.getOperand(0), Base.getOperand(1), Chain};
676   SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
677   transferMemOperands(LD, MN);
678   ReplaceNode(LD, MN);
679   return true;
680 }
681 
682 /// Turn an or of two masked values into the rotate left word immediate then
683 /// mask insert (rlwimi) instruction.
684 bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) {
685   SDValue Op0 = N->getOperand(0);
686   SDValue Op1 = N->getOperand(1);
687   SDLoc dl(N);
688 
689   KnownBits LKnown, RKnown;
690   CurDAG->computeKnownBits(Op0, LKnown);
691   CurDAG->computeKnownBits(Op1, RKnown);
692 
693   unsigned TargetMask = LKnown.Zero.getZExtValue();
694   unsigned InsertMask = RKnown.Zero.getZExtValue();
695 
696   if ((TargetMask | InsertMask) == 0xFFFFFFFF) {
697     unsigned Op0Opc = Op0.getOpcode();
698     unsigned Op1Opc = Op1.getOpcode();
699     unsigned Value, SH = 0;
700     TargetMask = ~TargetMask;
701     InsertMask = ~InsertMask;
702 
703     // If the LHS has a foldable shift and the RHS does not, then swap it to the
704     // RHS so that we can fold the shift into the insert.
705     if (Op0Opc == ISD::AND && Op1Opc == ISD::AND) {
706       if (Op0.getOperand(0).getOpcode() == ISD::SHL ||
707           Op0.getOperand(0).getOpcode() == ISD::SRL) {
708         if (Op1.getOperand(0).getOpcode() != ISD::SHL &&
709             Op1.getOperand(0).getOpcode() != ISD::SRL) {
710           std::swap(Op0, Op1);
711           std::swap(Op0Opc, Op1Opc);
712           std::swap(TargetMask, InsertMask);
713         }
714       }
715     } else if (Op0Opc == ISD::SHL || Op0Opc == ISD::SRL) {
716       if (Op1Opc == ISD::AND && Op1.getOperand(0).getOpcode() != ISD::SHL &&
717           Op1.getOperand(0).getOpcode() != ISD::SRL) {
718         std::swap(Op0, Op1);
719         std::swap(Op0Opc, Op1Opc);
720         std::swap(TargetMask, InsertMask);
721       }
722     }
723 
724     unsigned MB, ME;
725     if (isRunOfOnes(InsertMask, MB, ME)) {
726       if ((Op1Opc == ISD::SHL || Op1Opc == ISD::SRL) &&
727           isInt32Immediate(Op1.getOperand(1), Value)) {
728         Op1 = Op1.getOperand(0);
729         SH  = (Op1Opc == ISD::SHL) ? Value : 32 - Value;
730       }
731       if (Op1Opc == ISD::AND) {
732        // The AND mask might not be a constant, and we need to make sure that
733        // if we're going to fold the masking with the insert, all bits not
734        // know to be zero in the mask are known to be one.
735         KnownBits MKnown;
736         CurDAG->computeKnownBits(Op1.getOperand(1), MKnown);
737         bool CanFoldMask = InsertMask == MKnown.One.getZExtValue();
738 
739         unsigned SHOpc = Op1.getOperand(0).getOpcode();
740         if ((SHOpc == ISD::SHL || SHOpc == ISD::SRL) && CanFoldMask &&
741             isInt32Immediate(Op1.getOperand(0).getOperand(1), Value)) {
742           // Note that Value must be in range here (less than 32) because
743           // otherwise there would not be any bits set in InsertMask.
744           Op1 = Op1.getOperand(0).getOperand(0);
745           SH  = (SHOpc == ISD::SHL) ? Value : 32 - Value;
746         }
747       }
748 
749       SH &= 31;
750       SDValue Ops[] = { Op0, Op1, getI32Imm(SH, dl), getI32Imm(MB, dl),
751                           getI32Imm(ME, dl) };
752       ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops));
753       return true;
754     }
755   }
756   return false;
757 }
758 
759 // Predict the number of instructions that would be generated by calling
760 // selectI64Imm(N).
761 static unsigned selectI64ImmInstrCountDirect(int64_t Imm) {
762   // Assume no remaining bits.
763   unsigned Remainder = 0;
764   // Assume no shift required.
765   unsigned Shift = 0;
766 
767   // If it can't be represented as a 32 bit value.
768   if (!isInt<32>(Imm)) {
769     Shift = countTrailingZeros<uint64_t>(Imm);
770     int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
771 
772     // If the shifted value fits 32 bits.
773     if (isInt<32>(ImmSh)) {
774       // Go with the shifted value.
775       Imm = ImmSh;
776     } else {
777       // Still stuck with a 64 bit value.
778       Remainder = Imm;
779       Shift = 32;
780       Imm >>= 32;
781     }
782   }
783 
784   // Intermediate operand.
785   unsigned Result = 0;
786 
787   // Handle first 32 bits.
788   unsigned Lo = Imm & 0xFFFF;
789 
790   // Simple value.
791   if (isInt<16>(Imm)) {
792     // Just the Lo bits.
793     ++Result;
794   } else if (Lo) {
795     // Handle the Hi bits and Lo bits.
796     Result += 2;
797   } else {
798     // Just the Hi bits.
799     ++Result;
800   }
801 
802   // If no shift, we're done.
803   if (!Shift) return Result;
804 
805   // If Hi word == Lo word,
806   // we can use rldimi to insert the Lo word into Hi word.
807   if ((unsigned)(Imm & 0xFFFFFFFF) == Remainder) {
808     ++Result;
809     return Result;
810   }
811 
812   // Shift for next step if the upper 32-bits were not zero.
813   if (Imm)
814     ++Result;
815 
816   // Add in the last bits as required.
817   if ((Remainder >> 16) & 0xFFFF)
818     ++Result;
819   if (Remainder & 0xFFFF)
820     ++Result;
821 
822   return Result;
823 }
824 
825 static uint64_t Rot64(uint64_t Imm, unsigned R) {
826   return (Imm << R) | (Imm >> (64 - R));
827 }
828 
829 static unsigned selectI64ImmInstrCount(int64_t Imm) {
830   unsigned Count = selectI64ImmInstrCountDirect(Imm);
831 
832   // If the instruction count is 1 or 2, we do not need further analysis
833   // since rotate + load constant requires at least 2 instructions.
834   if (Count <= 2)
835     return Count;
836 
837   for (unsigned r = 1; r < 63; ++r) {
838     uint64_t RImm = Rot64(Imm, r);
839     unsigned RCount = selectI64ImmInstrCountDirect(RImm) + 1;
840     Count = std::min(Count, RCount);
841 
842     // See comments in selectI64Imm for an explanation of the logic below.
843     unsigned LS = findLastSet(RImm);
844     if (LS != r-1)
845       continue;
846 
847     uint64_t OnesMask = -(int64_t) (UINT64_C(1) << (LS+1));
848     uint64_t RImmWithOnes = RImm | OnesMask;
849 
850     RCount = selectI64ImmInstrCountDirect(RImmWithOnes) + 1;
851     Count = std::min(Count, RCount);
852   }
853 
854   return Count;
855 }
856 
857 // Select a 64-bit constant. For cost-modeling purposes, selectI64ImmInstrCount
858 // (above) needs to be kept in sync with this function.
859 static SDNode *selectI64ImmDirect(SelectionDAG *CurDAG, const SDLoc &dl,
860                                   int64_t Imm) {
861   // Assume no remaining bits.
862   unsigned Remainder = 0;
863   // Assume no shift required.
864   unsigned Shift = 0;
865 
866   // If it can't be represented as a 32 bit value.
867   if (!isInt<32>(Imm)) {
868     Shift = countTrailingZeros<uint64_t>(Imm);
869     int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
870 
871     // If the shifted value fits 32 bits.
872     if (isInt<32>(ImmSh)) {
873       // Go with the shifted value.
874       Imm = ImmSh;
875     } else {
876       // Still stuck with a 64 bit value.
877       Remainder = Imm;
878       Shift = 32;
879       Imm >>= 32;
880     }
881   }
882 
883   // Intermediate operand.
884   SDNode *Result;
885 
886   // Handle first 32 bits.
887   unsigned Lo = Imm & 0xFFFF;
888   unsigned Hi = (Imm >> 16) & 0xFFFF;
889 
890   auto getI32Imm = [CurDAG, dl](unsigned Imm) {
891       return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
892   };
893 
894   // Simple value.
895   if (isInt<16>(Imm)) {
896     uint64_t SextImm = SignExtend64(Lo, 16);
897     SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64);
898     // Just the Lo bits.
899     Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm);
900   } else if (Lo) {
901     // Handle the Hi bits.
902     unsigned OpC = Hi ? PPC::LIS8 : PPC::LI8;
903     Result = CurDAG->getMachineNode(OpC, dl, MVT::i64, getI32Imm(Hi));
904     // And Lo bits.
905     Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
906                                     SDValue(Result, 0), getI32Imm(Lo));
907   } else {
908     // Just the Hi bits.
909     Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(Hi));
910   }
911 
912   // If no shift, we're done.
913   if (!Shift) return Result;
914 
915   // If Hi word == Lo word,
916   // we can use rldimi to insert the Lo word into Hi word.
917   if ((unsigned)(Imm & 0xFFFFFFFF) == Remainder) {
918     SDValue Ops[] =
919       { SDValue(Result, 0), SDValue(Result, 0), getI32Imm(Shift), getI32Imm(0)};
920     return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
921   }
922 
923   // Shift for next step if the upper 32-bits were not zero.
924   if (Imm) {
925     Result = CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64,
926                                     SDValue(Result, 0),
927                                     getI32Imm(Shift),
928                                     getI32Imm(63 - Shift));
929   }
930 
931   // Add in the last bits as required.
932   if ((Hi = (Remainder >> 16) & 0xFFFF)) {
933     Result = CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64,
934                                     SDValue(Result, 0), getI32Imm(Hi));
935   }
936   if ((Lo = Remainder & 0xFFFF)) {
937     Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
938                                     SDValue(Result, 0), getI32Imm(Lo));
939   }
940 
941   return Result;
942 }
943 
944 static SDNode *selectI64Imm(SelectionDAG *CurDAG, const SDLoc &dl,
945                             int64_t Imm) {
946   unsigned Count = selectI64ImmInstrCountDirect(Imm);
947 
948   // If the instruction count is 1 or 2, we do not need further analysis
949   // since rotate + load constant requires at least 2 instructions.
950   if (Count <= 2)
951     return selectI64ImmDirect(CurDAG, dl, Imm);
952 
953   unsigned RMin = 0;
954 
955   int64_t MatImm;
956   unsigned MaskEnd;
957 
958   for (unsigned r = 1; r < 63; ++r) {
959     uint64_t RImm = Rot64(Imm, r);
960     unsigned RCount = selectI64ImmInstrCountDirect(RImm) + 1;
961     if (RCount < Count) {
962       Count = RCount;
963       RMin = r;
964       MatImm = RImm;
965       MaskEnd = 63;
966     }
967 
968     // If the immediate to generate has many trailing zeros, it might be
969     // worthwhile to generate a rotated value with too many leading ones
970     // (because that's free with li/lis's sign-extension semantics), and then
971     // mask them off after rotation.
972 
973     unsigned LS = findLastSet(RImm);
974     // We're adding (63-LS) higher-order ones, and we expect to mask them off
975     // after performing the inverse rotation by (64-r). So we need that:
976     //   63-LS == 64-r => LS == r-1
977     if (LS != r-1)
978       continue;
979 
980     uint64_t OnesMask = -(int64_t) (UINT64_C(1) << (LS+1));
981     uint64_t RImmWithOnes = RImm | OnesMask;
982 
983     RCount = selectI64ImmInstrCountDirect(RImmWithOnes) + 1;
984     if (RCount < Count) {
985       Count = RCount;
986       RMin = r;
987       MatImm = RImmWithOnes;
988       MaskEnd = LS;
989     }
990   }
991 
992   if (!RMin)
993     return selectI64ImmDirect(CurDAG, dl, Imm);
994 
995   auto getI32Imm = [CurDAG, dl](unsigned Imm) {
996       return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
997   };
998 
999   SDValue Val = SDValue(selectI64ImmDirect(CurDAG, dl, MatImm), 0);
1000   return CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Val,
1001                                 getI32Imm(64 - RMin), getI32Imm(MaskEnd));
1002 }
1003 
1004 static unsigned allUsesTruncate(SelectionDAG *CurDAG, SDNode *N) {
1005   unsigned MaxTruncation = 0;
1006   // Cannot use range-based for loop here as we need the actual use (i.e. we
1007   // need the operand number corresponding to the use). A range-based for
1008   // will unbox the use and provide an SDNode*.
1009   for (SDNode::use_iterator Use = N->use_begin(), UseEnd = N->use_end();
1010        Use != UseEnd; ++Use) {
1011     unsigned Opc =
1012       Use->isMachineOpcode() ? Use->getMachineOpcode() : Use->getOpcode();
1013     switch (Opc) {
1014     default: return 0;
1015     case ISD::TRUNCATE:
1016       if (Use->isMachineOpcode())
1017         return 0;
1018       MaxTruncation =
1019         std::max(MaxTruncation, Use->getValueType(0).getSizeInBits());
1020       continue;
1021     case ISD::STORE: {
1022       if (Use->isMachineOpcode())
1023         return 0;
1024       StoreSDNode *STN = cast<StoreSDNode>(*Use);
1025       unsigned MemVTSize = STN->getMemoryVT().getSizeInBits();
1026       if (MemVTSize == 64 || Use.getOperandNo() != 0)
1027         return 0;
1028       MaxTruncation = std::max(MaxTruncation, MemVTSize);
1029       continue;
1030     }
1031     case PPC::STW8:
1032     case PPC::STWX8:
1033     case PPC::STWU8:
1034     case PPC::STWUX8:
1035       if (Use.getOperandNo() != 0)
1036         return 0;
1037       MaxTruncation = std::max(MaxTruncation, 32u);
1038       continue;
1039     case PPC::STH8:
1040     case PPC::STHX8:
1041     case PPC::STHU8:
1042     case PPC::STHUX8:
1043       if (Use.getOperandNo() != 0)
1044         return 0;
1045       MaxTruncation = std::max(MaxTruncation, 16u);
1046       continue;
1047     case PPC::STB8:
1048     case PPC::STBX8:
1049     case PPC::STBU8:
1050     case PPC::STBUX8:
1051       if (Use.getOperandNo() != 0)
1052         return 0;
1053       MaxTruncation = std::max(MaxTruncation, 8u);
1054       continue;
1055     }
1056   }
1057   return MaxTruncation;
1058 }
1059 
1060 // Select a 64-bit constant.
1061 static SDNode *selectI64Imm(SelectionDAG *CurDAG, SDNode *N) {
1062   SDLoc dl(N);
1063 
1064   // Get 64 bit value.
1065   int64_t Imm = cast<ConstantSDNode>(N)->getZExtValue();
1066   if (unsigned MinSize = allUsesTruncate(CurDAG, N)) {
1067     uint64_t SextImm = SignExtend64(Imm, MinSize);
1068     SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64);
1069     if (isInt<16>(SextImm))
1070       return CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm);
1071   }
1072   return selectI64Imm(CurDAG, dl, Imm);
1073 }
1074 
1075 namespace {
1076 
1077 class BitPermutationSelector {
1078   struct ValueBit {
1079     SDValue V;
1080 
1081     // The bit number in the value, using a convention where bit 0 is the
1082     // lowest-order bit.
1083     unsigned Idx;
1084 
1085     enum Kind {
1086       ConstZero,
1087       Variable
1088     } K;
1089 
1090     ValueBit(SDValue V, unsigned I, Kind K = Variable)
1091       : V(V), Idx(I), K(K) {}
1092     ValueBit(Kind K = Variable)
1093       : V(SDValue(nullptr, 0)), Idx(UINT32_MAX), K(K) {}
1094 
1095     bool isZero() const {
1096       return K == ConstZero;
1097     }
1098 
1099     bool hasValue() const {
1100       return K == Variable;
1101     }
1102 
1103     SDValue getValue() const {
1104       assert(hasValue() && "Cannot get the value of a constant bit");
1105       return V;
1106     }
1107 
1108     unsigned getValueBitIndex() const {
1109       assert(hasValue() && "Cannot get the value bit index of a constant bit");
1110       return Idx;
1111     }
1112   };
1113 
1114   // A bit group has the same underlying value and the same rotate factor.
1115   struct BitGroup {
1116     SDValue V;
1117     unsigned RLAmt;
1118     unsigned StartIdx, EndIdx;
1119 
1120     // This rotation amount assumes that the lower 32 bits of the quantity are
1121     // replicated in the high 32 bits by the rotation operator (which is done
1122     // by rlwinm and friends in 64-bit mode).
1123     bool Repl32;
1124     // Did converting to Repl32 == true change the rotation factor? If it did,
1125     // it decreased it by 32.
1126     bool Repl32CR;
1127     // Was this group coalesced after setting Repl32 to true?
1128     bool Repl32Coalesced;
1129 
1130     BitGroup(SDValue V, unsigned R, unsigned S, unsigned E)
1131       : V(V), RLAmt(R), StartIdx(S), EndIdx(E), Repl32(false), Repl32CR(false),
1132         Repl32Coalesced(false) {
1133       DEBUG(dbgs() << "\tbit group for " << V.getNode() << " RLAmt = " << R <<
1134                       " [" << S << ", " << E << "]\n");
1135     }
1136   };
1137 
1138   // Information on each (Value, RLAmt) pair (like the number of groups
1139   // associated with each) used to choose the lowering method.
1140   struct ValueRotInfo {
1141     SDValue V;
1142     unsigned RLAmt = std::numeric_limits<unsigned>::max();
1143     unsigned NumGroups = 0;
1144     unsigned FirstGroupStartIdx = std::numeric_limits<unsigned>::max();
1145     bool Repl32 = false;
1146 
1147     ValueRotInfo() = default;
1148 
1149     // For sorting (in reverse order) by NumGroups, and then by
1150     // FirstGroupStartIdx.
1151     bool operator < (const ValueRotInfo &Other) const {
1152       // We need to sort so that the non-Repl32 come first because, when we're
1153       // doing masking, the Repl32 bit groups might be subsumed into the 64-bit
1154       // masking operation.
1155       if (Repl32 < Other.Repl32)
1156         return true;
1157       else if (Repl32 > Other.Repl32)
1158         return false;
1159       else if (NumGroups > Other.NumGroups)
1160         return true;
1161       else if (NumGroups < Other.NumGroups)
1162         return false;
1163       else if (FirstGroupStartIdx < Other.FirstGroupStartIdx)
1164         return true;
1165       return false;
1166     }
1167   };
1168 
1169   using ValueBitsMemoizedValue = std::pair<bool, SmallVector<ValueBit, 64>>;
1170   using ValueBitsMemoizer =
1171       DenseMap<SDValue, std::unique_ptr<ValueBitsMemoizedValue>>;
1172   ValueBitsMemoizer Memoizer;
1173 
1174   // Return a pair of bool and a SmallVector pointer to a memoization entry.
1175   // The bool is true if something interesting was deduced, otherwise if we're
1176   // providing only a generic representation of V (or something else likewise
1177   // uninteresting for instruction selection) through the SmallVector.
1178   std::pair<bool, SmallVector<ValueBit, 64> *> getValueBits(SDValue V,
1179                                                             unsigned NumBits) {
1180     auto &ValueEntry = Memoizer[V];
1181     if (ValueEntry)
1182       return std::make_pair(ValueEntry->first, &ValueEntry->second);
1183     ValueEntry.reset(new ValueBitsMemoizedValue());
1184     bool &Interesting = ValueEntry->first;
1185     SmallVector<ValueBit, 64> &Bits = ValueEntry->second;
1186     Bits.resize(NumBits);
1187 
1188     switch (V.getOpcode()) {
1189     default: break;
1190     case ISD::ROTL:
1191       if (isa<ConstantSDNode>(V.getOperand(1))) {
1192         unsigned RotAmt = V.getConstantOperandVal(1);
1193 
1194         const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1195 
1196         for (unsigned i = 0; i < NumBits; ++i)
1197           Bits[i] = LHSBits[i < RotAmt ? i + (NumBits - RotAmt) : i - RotAmt];
1198 
1199         return std::make_pair(Interesting = true, &Bits);
1200       }
1201       break;
1202     case ISD::SHL:
1203       if (isa<ConstantSDNode>(V.getOperand(1))) {
1204         unsigned ShiftAmt = V.getConstantOperandVal(1);
1205 
1206         const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1207 
1208         for (unsigned i = ShiftAmt; i < NumBits; ++i)
1209           Bits[i] = LHSBits[i - ShiftAmt];
1210 
1211         for (unsigned i = 0; i < ShiftAmt; ++i)
1212           Bits[i] = ValueBit(ValueBit::ConstZero);
1213 
1214         return std::make_pair(Interesting = true, &Bits);
1215       }
1216       break;
1217     case ISD::SRL:
1218       if (isa<ConstantSDNode>(V.getOperand(1))) {
1219         unsigned ShiftAmt = V.getConstantOperandVal(1);
1220 
1221         const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1222 
1223         for (unsigned i = 0; i < NumBits - ShiftAmt; ++i)
1224           Bits[i] = LHSBits[i + ShiftAmt];
1225 
1226         for (unsigned i = NumBits - ShiftAmt; i < NumBits; ++i)
1227           Bits[i] = ValueBit(ValueBit::ConstZero);
1228 
1229         return std::make_pair(Interesting = true, &Bits);
1230       }
1231       break;
1232     case ISD::AND:
1233       if (isa<ConstantSDNode>(V.getOperand(1))) {
1234         uint64_t Mask = V.getConstantOperandVal(1);
1235 
1236         const SmallVector<ValueBit, 64> *LHSBits;
1237         // Mark this as interesting, only if the LHS was also interesting. This
1238         // prevents the overall procedure from matching a single immediate 'and'
1239         // (which is non-optimal because such an and might be folded with other
1240         // things if we don't select it here).
1241         std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0), NumBits);
1242 
1243         for (unsigned i = 0; i < NumBits; ++i)
1244           if (((Mask >> i) & 1) == 1)
1245             Bits[i] = (*LHSBits)[i];
1246           else
1247             Bits[i] = ValueBit(ValueBit::ConstZero);
1248 
1249         return std::make_pair(Interesting, &Bits);
1250       }
1251       break;
1252     case ISD::OR: {
1253       const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1254       const auto &RHSBits = *getValueBits(V.getOperand(1), NumBits).second;
1255 
1256       bool AllDisjoint = true;
1257       for (unsigned i = 0; i < NumBits; ++i)
1258         if (LHSBits[i].isZero())
1259           Bits[i] = RHSBits[i];
1260         else if (RHSBits[i].isZero())
1261           Bits[i] = LHSBits[i];
1262         else {
1263           AllDisjoint = false;
1264           break;
1265         }
1266 
1267       if (!AllDisjoint)
1268         break;
1269 
1270       return std::make_pair(Interesting = true, &Bits);
1271     }
1272     case ISD::ZERO_EXTEND: {
1273       // We support only the case with zero extension from i32 to i64 so far.
1274       if (V.getValueType() != MVT::i64 ||
1275           V.getOperand(0).getValueType() != MVT::i32)
1276         break;
1277 
1278       const SmallVector<ValueBit, 64> *LHSBits;
1279       const unsigned NumOperandBits = 32;
1280       std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0),
1281                                                     NumOperandBits);
1282 
1283       for (unsigned i = 0; i < NumOperandBits; ++i)
1284         Bits[i] = (*LHSBits)[i];
1285 
1286       for (unsigned i = NumOperandBits; i < NumBits; ++i)
1287         Bits[i] = ValueBit(ValueBit::ConstZero);
1288 
1289       return std::make_pair(Interesting, &Bits);
1290       }
1291     }
1292 
1293     for (unsigned i = 0; i < NumBits; ++i)
1294       Bits[i] = ValueBit(V, i);
1295 
1296     return std::make_pair(Interesting = false, &Bits);
1297   }
1298 
1299   // For each value (except the constant ones), compute the left-rotate amount
1300   // to get it from its original to final position.
1301   void computeRotationAmounts() {
1302     HasZeros = false;
1303     RLAmt.resize(Bits.size());
1304     for (unsigned i = 0; i < Bits.size(); ++i)
1305       if (Bits[i].hasValue()) {
1306         unsigned VBI = Bits[i].getValueBitIndex();
1307         if (i >= VBI)
1308           RLAmt[i] = i - VBI;
1309         else
1310           RLAmt[i] = Bits.size() - (VBI - i);
1311       } else if (Bits[i].isZero()) {
1312         HasZeros = true;
1313         RLAmt[i] = UINT32_MAX;
1314       } else {
1315         llvm_unreachable("Unknown value bit type");
1316       }
1317   }
1318 
1319   // Collect groups of consecutive bits with the same underlying value and
1320   // rotation factor. If we're doing late masking, we ignore zeros, otherwise
1321   // they break up groups.
1322   void collectBitGroups(bool LateMask) {
1323     BitGroups.clear();
1324 
1325     unsigned LastRLAmt = RLAmt[0];
1326     SDValue LastValue = Bits[0].hasValue() ? Bits[0].getValue() : SDValue();
1327     unsigned LastGroupStartIdx = 0;
1328     for (unsigned i = 1; i < Bits.size(); ++i) {
1329       unsigned ThisRLAmt = RLAmt[i];
1330       SDValue ThisValue = Bits[i].hasValue() ? Bits[i].getValue() : SDValue();
1331       if (LateMask && !ThisValue) {
1332         ThisValue = LastValue;
1333         ThisRLAmt = LastRLAmt;
1334         // If we're doing late masking, then the first bit group always starts
1335         // at zero (even if the first bits were zero).
1336         if (BitGroups.empty())
1337           LastGroupStartIdx = 0;
1338       }
1339 
1340       // If this bit has the same underlying value and the same rotate factor as
1341       // the last one, then they're part of the same group.
1342       if (ThisRLAmt == LastRLAmt && ThisValue == LastValue)
1343         continue;
1344 
1345       if (LastValue.getNode())
1346         BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
1347                                      i-1));
1348       LastRLAmt = ThisRLAmt;
1349       LastValue = ThisValue;
1350       LastGroupStartIdx = i;
1351     }
1352     if (LastValue.getNode())
1353       BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
1354                                    Bits.size()-1));
1355 
1356     if (BitGroups.empty())
1357       return;
1358 
1359     // We might be able to combine the first and last groups.
1360     if (BitGroups.size() > 1) {
1361       // If the first and last groups are the same, then remove the first group
1362       // in favor of the last group, making the ending index of the last group
1363       // equal to the ending index of the to-be-removed first group.
1364       if (BitGroups[0].StartIdx == 0 &&
1365           BitGroups[BitGroups.size()-1].EndIdx == Bits.size()-1 &&
1366           BitGroups[0].V == BitGroups[BitGroups.size()-1].V &&
1367           BitGroups[0].RLAmt == BitGroups[BitGroups.size()-1].RLAmt) {
1368         DEBUG(dbgs() << "\tcombining final bit group with initial one\n");
1369         BitGroups[BitGroups.size()-1].EndIdx = BitGroups[0].EndIdx;
1370         BitGroups.erase(BitGroups.begin());
1371       }
1372     }
1373   }
1374 
1375   // Take all (SDValue, RLAmt) pairs and sort them by the number of groups
1376   // associated with each. If there is a degeneracy, pick the one that occurs
1377   // first (in the final value).
1378   void collectValueRotInfo() {
1379     ValueRots.clear();
1380 
1381     for (auto &BG : BitGroups) {
1382       unsigned RLAmtKey = BG.RLAmt + (BG.Repl32 ? 64 : 0);
1383       ValueRotInfo &VRI = ValueRots[std::make_pair(BG.V, RLAmtKey)];
1384       VRI.V = BG.V;
1385       VRI.RLAmt = BG.RLAmt;
1386       VRI.Repl32 = BG.Repl32;
1387       VRI.NumGroups += 1;
1388       VRI.FirstGroupStartIdx = std::min(VRI.FirstGroupStartIdx, BG.StartIdx);
1389     }
1390 
1391     // Now that we've collected the various ValueRotInfo instances, we need to
1392     // sort them.
1393     ValueRotsVec.clear();
1394     for (auto &I : ValueRots) {
1395       ValueRotsVec.push_back(I.second);
1396     }
1397     llvm::sort(ValueRotsVec.begin(), ValueRotsVec.end());
1398   }
1399 
1400   // In 64-bit mode, rlwinm and friends have a rotation operator that
1401   // replicates the low-order 32 bits into the high-order 32-bits. The mask
1402   // indices of these instructions can only be in the lower 32 bits, so they
1403   // can only represent some 64-bit bit groups. However, when they can be used,
1404   // the 32-bit replication can be used to represent, as a single bit group,
1405   // otherwise separate bit groups. We'll convert to replicated-32-bit bit
1406   // groups when possible. Returns true if any of the bit groups were
1407   // converted.
1408   void assignRepl32BitGroups() {
1409     // If we have bits like this:
1410     //
1411     // Indices:    15 14 13 12 11 10 9 8  7  6  5  4  3  2  1  0
1412     // V bits: ... 7  6  5  4  3  2  1 0 31 30 29 28 27 26 25 24
1413     // Groups:    |      RLAmt = 8      |      RLAmt = 40       |
1414     //
1415     // But, making use of a 32-bit operation that replicates the low-order 32
1416     // bits into the high-order 32 bits, this can be one bit group with a RLAmt
1417     // of 8.
1418 
1419     auto IsAllLow32 = [this](BitGroup & BG) {
1420       if (BG.StartIdx <= BG.EndIdx) {
1421         for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i) {
1422           if (!Bits[i].hasValue())
1423             continue;
1424           if (Bits[i].getValueBitIndex() >= 32)
1425             return false;
1426         }
1427       } else {
1428         for (unsigned i = BG.StartIdx; i < Bits.size(); ++i) {
1429           if (!Bits[i].hasValue())
1430             continue;
1431           if (Bits[i].getValueBitIndex() >= 32)
1432             return false;
1433         }
1434         for (unsigned i = 0; i <= BG.EndIdx; ++i) {
1435           if (!Bits[i].hasValue())
1436             continue;
1437           if (Bits[i].getValueBitIndex() >= 32)
1438             return false;
1439         }
1440       }
1441 
1442       return true;
1443     };
1444 
1445     for (auto &BG : BitGroups) {
1446       if (BG.StartIdx < 32 && BG.EndIdx < 32) {
1447         if (IsAllLow32(BG)) {
1448           if (BG.RLAmt >= 32) {
1449             BG.RLAmt -= 32;
1450             BG.Repl32CR = true;
1451           }
1452 
1453           BG.Repl32 = true;
1454 
1455           DEBUG(dbgs() << "\t32-bit replicated bit group for " <<
1456                           BG.V.getNode() << " RLAmt = " << BG.RLAmt <<
1457                           " [" << BG.StartIdx << ", " << BG.EndIdx << "]\n");
1458         }
1459       }
1460     }
1461 
1462     // Now walk through the bit groups, consolidating where possible.
1463     for (auto I = BitGroups.begin(); I != BitGroups.end();) {
1464       // We might want to remove this bit group by merging it with the previous
1465       // group (which might be the ending group).
1466       auto IP = (I == BitGroups.begin()) ?
1467                 std::prev(BitGroups.end()) : std::prev(I);
1468       if (I->Repl32 && IP->Repl32 && I->V == IP->V && I->RLAmt == IP->RLAmt &&
1469           I->StartIdx == (IP->EndIdx + 1) % 64 && I != IP) {
1470 
1471         DEBUG(dbgs() << "\tcombining 32-bit replicated bit group for " <<
1472                         I->V.getNode() << " RLAmt = " << I->RLAmt <<
1473                         " [" << I->StartIdx << ", " << I->EndIdx <<
1474                         "] with group with range [" <<
1475                         IP->StartIdx << ", " << IP->EndIdx << "]\n");
1476 
1477         IP->EndIdx = I->EndIdx;
1478         IP->Repl32CR = IP->Repl32CR || I->Repl32CR;
1479         IP->Repl32Coalesced = true;
1480         I = BitGroups.erase(I);
1481         continue;
1482       } else {
1483         // There is a special case worth handling: If there is a single group
1484         // covering the entire upper 32 bits, and it can be merged with both
1485         // the next and previous groups (which might be the same group), then
1486         // do so. If it is the same group (so there will be only one group in
1487         // total), then we need to reverse the order of the range so that it
1488         // covers the entire 64 bits.
1489         if (I->StartIdx == 32 && I->EndIdx == 63) {
1490           assert(std::next(I) == BitGroups.end() &&
1491                  "bit group ends at index 63 but there is another?");
1492           auto IN = BitGroups.begin();
1493 
1494           if (IP->Repl32 && IN->Repl32 && I->V == IP->V && I->V == IN->V &&
1495               (I->RLAmt % 32) == IP->RLAmt && (I->RLAmt % 32) == IN->RLAmt &&
1496               IP->EndIdx == 31 && IN->StartIdx == 0 && I != IP &&
1497               IsAllLow32(*I)) {
1498 
1499             DEBUG(dbgs() << "\tcombining bit group for " <<
1500                             I->V.getNode() << " RLAmt = " << I->RLAmt <<
1501                             " [" << I->StartIdx << ", " << I->EndIdx <<
1502                             "] with 32-bit replicated groups with ranges [" <<
1503                             IP->StartIdx << ", " << IP->EndIdx << "] and [" <<
1504                             IN->StartIdx << ", " << IN->EndIdx << "]\n");
1505 
1506             if (IP == IN) {
1507               // There is only one other group; change it to cover the whole
1508               // range (backward, so that it can still be Repl32 but cover the
1509               // whole 64-bit range).
1510               IP->StartIdx = 31;
1511               IP->EndIdx = 30;
1512               IP->Repl32CR = IP->Repl32CR || I->RLAmt >= 32;
1513               IP->Repl32Coalesced = true;
1514               I = BitGroups.erase(I);
1515             } else {
1516               // There are two separate groups, one before this group and one
1517               // after us (at the beginning). We're going to remove this group,
1518               // but also the group at the very beginning.
1519               IP->EndIdx = IN->EndIdx;
1520               IP->Repl32CR = IP->Repl32CR || IN->Repl32CR || I->RLAmt >= 32;
1521               IP->Repl32Coalesced = true;
1522               I = BitGroups.erase(I);
1523               BitGroups.erase(BitGroups.begin());
1524             }
1525 
1526             // This must be the last group in the vector (and we might have
1527             // just invalidated the iterator above), so break here.
1528             break;
1529           }
1530         }
1531       }
1532 
1533       ++I;
1534     }
1535   }
1536 
1537   SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
1538     return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
1539   }
1540 
1541   uint64_t getZerosMask() {
1542     uint64_t Mask = 0;
1543     for (unsigned i = 0; i < Bits.size(); ++i) {
1544       if (Bits[i].hasValue())
1545         continue;
1546       Mask |= (UINT64_C(1) << i);
1547     }
1548 
1549     return ~Mask;
1550   }
1551 
1552   // This method extends an input value to 64 bit if input is 32-bit integer.
1553   // While selecting instructions in BitPermutationSelector in 64-bit mode,
1554   // an input value can be a 32-bit integer if a ZERO_EXTEND node is included.
1555   // In such case, we extend it to 64 bit to be consistent with other values.
1556   SDValue ExtendToInt64(SDValue V, const SDLoc &dl) {
1557     if (V.getValueSizeInBits() == 64)
1558       return V;
1559 
1560     assert(V.getValueSizeInBits() == 32);
1561     SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
1562     SDValue ImDef = SDValue(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl,
1563                                                    MVT::i64), 0);
1564     SDValue ExtVal = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl,
1565                                                     MVT::i64, ImDef, V,
1566                                                     SubRegIdx), 0);
1567     return ExtVal;
1568   }
1569 
1570   // Depending on the number of groups for a particular value, it might be
1571   // better to rotate, mask explicitly (using andi/andis), and then or the
1572   // result. Select this part of the result first.
1573   void SelectAndParts32(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) {
1574     if (BPermRewriterNoMasking)
1575       return;
1576 
1577     for (ValueRotInfo &VRI : ValueRotsVec) {
1578       unsigned Mask = 0;
1579       for (unsigned i = 0; i < Bits.size(); ++i) {
1580         if (!Bits[i].hasValue() || Bits[i].getValue() != VRI.V)
1581           continue;
1582         if (RLAmt[i] != VRI.RLAmt)
1583           continue;
1584         Mask |= (1u << i);
1585       }
1586 
1587       // Compute the masks for andi/andis that would be necessary.
1588       unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16;
1589       assert((ANDIMask != 0 || ANDISMask != 0) &&
1590              "No set bits in mask for value bit groups");
1591       bool NeedsRotate = VRI.RLAmt != 0;
1592 
1593       // We're trying to minimize the number of instructions. If we have one
1594       // group, using one of andi/andis can break even.  If we have three
1595       // groups, we can use both andi and andis and break even (to use both
1596       // andi and andis we also need to or the results together). We need four
1597       // groups if we also need to rotate. To use andi/andis we need to do more
1598       // than break even because rotate-and-mask instructions tend to be easier
1599       // to schedule.
1600 
1601       // FIXME: We've biased here against using andi/andis, which is right for
1602       // POWER cores, but not optimal everywhere. For example, on the A2,
1603       // andi/andis have single-cycle latency whereas the rotate-and-mask
1604       // instructions take two cycles, and it would be better to bias toward
1605       // andi/andis in break-even cases.
1606 
1607       unsigned NumAndInsts = (unsigned) NeedsRotate +
1608                              (unsigned) (ANDIMask != 0) +
1609                              (unsigned) (ANDISMask != 0) +
1610                              (unsigned) (ANDIMask != 0 && ANDISMask != 0) +
1611                              (unsigned) (bool) Res;
1612 
1613       DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode() <<
1614                       " RL: " << VRI.RLAmt << ":" <<
1615                       "\n\t\t\tisel using masking: " << NumAndInsts <<
1616                       " using rotates: " << VRI.NumGroups << "\n");
1617 
1618       if (NumAndInsts >= VRI.NumGroups)
1619         continue;
1620 
1621       DEBUG(dbgs() << "\t\t\t\tusing masking\n");
1622 
1623       if (InstCnt) *InstCnt += NumAndInsts;
1624 
1625       SDValue VRot;
1626       if (VRI.RLAmt) {
1627         SDValue Ops[] =
1628           { VRI.V, getI32Imm(VRI.RLAmt, dl), getI32Imm(0, dl),
1629             getI32Imm(31, dl) };
1630         VRot = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
1631                                               Ops), 0);
1632       } else {
1633         VRot = VRI.V;
1634       }
1635 
1636       SDValue ANDIVal, ANDISVal;
1637       if (ANDIMask != 0)
1638         ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo, dl, MVT::i32,
1639                             VRot, getI32Imm(ANDIMask, dl)), 0);
1640       if (ANDISMask != 0)
1641         ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo, dl, MVT::i32,
1642                              VRot, getI32Imm(ANDISMask, dl)), 0);
1643 
1644       SDValue TotalVal;
1645       if (!ANDIVal)
1646         TotalVal = ANDISVal;
1647       else if (!ANDISVal)
1648         TotalVal = ANDIVal;
1649       else
1650         TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
1651                              ANDIVal, ANDISVal), 0);
1652 
1653       if (!Res)
1654         Res = TotalVal;
1655       else
1656         Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
1657                         Res, TotalVal), 0);
1658 
1659       // Now, remove all groups with this underlying value and rotation
1660       // factor.
1661       eraseMatchingBitGroups([VRI](const BitGroup &BG) {
1662         return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt;
1663       });
1664     }
1665   }
1666 
1667   // Instruction selection for the 32-bit case.
1668   SDNode *Select32(SDNode *N, bool LateMask, unsigned *InstCnt) {
1669     SDLoc dl(N);
1670     SDValue Res;
1671 
1672     if (InstCnt) *InstCnt = 0;
1673 
1674     // Take care of cases that should use andi/andis first.
1675     SelectAndParts32(dl, Res, InstCnt);
1676 
1677     // If we've not yet selected a 'starting' instruction, and we have no zeros
1678     // to fill in, select the (Value, RLAmt) with the highest priority (largest
1679     // number of groups), and start with this rotated value.
1680     if ((!HasZeros || LateMask) && !Res) {
1681       ValueRotInfo &VRI = ValueRotsVec[0];
1682       if (VRI.RLAmt) {
1683         if (InstCnt) *InstCnt += 1;
1684         SDValue Ops[] =
1685           { VRI.V, getI32Imm(VRI.RLAmt, dl), getI32Imm(0, dl),
1686             getI32Imm(31, dl) };
1687         Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops),
1688                       0);
1689       } else {
1690         Res = VRI.V;
1691       }
1692 
1693       // Now, remove all groups with this underlying value and rotation factor.
1694       eraseMatchingBitGroups([VRI](const BitGroup &BG) {
1695         return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt;
1696       });
1697     }
1698 
1699     if (InstCnt) *InstCnt += BitGroups.size();
1700 
1701     // Insert the other groups (one at a time).
1702     for (auto &BG : BitGroups) {
1703       if (!Res) {
1704         SDValue Ops[] =
1705           { BG.V, getI32Imm(BG.RLAmt, dl),
1706             getI32Imm(Bits.size() - BG.EndIdx - 1, dl),
1707             getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };
1708         Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
1709       } else {
1710         SDValue Ops[] =
1711           { Res, BG.V, getI32Imm(BG.RLAmt, dl),
1712               getI32Imm(Bits.size() - BG.EndIdx - 1, dl),
1713             getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };
1714         Res = SDValue(CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops), 0);
1715       }
1716     }
1717 
1718     if (LateMask) {
1719       unsigned Mask = (unsigned) getZerosMask();
1720 
1721       unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16;
1722       assert((ANDIMask != 0 || ANDISMask != 0) &&
1723              "No set bits in zeros mask?");
1724 
1725       if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) +
1726                                (unsigned) (ANDISMask != 0) +
1727                                (unsigned) (ANDIMask != 0 && ANDISMask != 0);
1728 
1729       SDValue ANDIVal, ANDISVal;
1730       if (ANDIMask != 0)
1731         ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo, dl, MVT::i32,
1732                             Res, getI32Imm(ANDIMask, dl)), 0);
1733       if (ANDISMask != 0)
1734         ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo, dl, MVT::i32,
1735                              Res, getI32Imm(ANDISMask, dl)), 0);
1736 
1737       if (!ANDIVal)
1738         Res = ANDISVal;
1739       else if (!ANDISVal)
1740         Res = ANDIVal;
1741       else
1742         Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
1743                         ANDIVal, ANDISVal), 0);
1744     }
1745 
1746     return Res.getNode();
1747   }
1748 
1749   unsigned SelectRotMask64Count(unsigned RLAmt, bool Repl32,
1750                                 unsigned MaskStart, unsigned MaskEnd,
1751                                 bool IsIns) {
1752     // In the notation used by the instructions, 'start' and 'end' are reversed
1753     // because bits are counted from high to low order.
1754     unsigned InstMaskStart = 64 - MaskEnd - 1,
1755              InstMaskEnd   = 64 - MaskStart - 1;
1756 
1757     if (Repl32)
1758       return 1;
1759 
1760     if ((!IsIns && (InstMaskEnd == 63 || InstMaskStart == 0)) ||
1761         InstMaskEnd == 63 - RLAmt)
1762       return 1;
1763 
1764     return 2;
1765   }
1766 
1767   // For 64-bit values, not all combinations of rotates and masks are
1768   // available. Produce one if it is available.
1769   SDValue SelectRotMask64(SDValue V, const SDLoc &dl, unsigned RLAmt,
1770                           bool Repl32, unsigned MaskStart, unsigned MaskEnd,
1771                           unsigned *InstCnt = nullptr) {
1772     // In the notation used by the instructions, 'start' and 'end' are reversed
1773     // because bits are counted from high to low order.
1774     unsigned InstMaskStart = 64 - MaskEnd - 1,
1775              InstMaskEnd   = 64 - MaskStart - 1;
1776 
1777     if (InstCnt) *InstCnt += 1;
1778 
1779     if (Repl32) {
1780       // This rotation amount assumes that the lower 32 bits of the quantity
1781       // are replicated in the high 32 bits by the rotation operator (which is
1782       // done by rlwinm and friends).
1783       assert(InstMaskStart >= 32 && "Mask cannot start out of range");
1784       assert(InstMaskEnd   >= 32 && "Mask cannot end out of range");
1785       SDValue Ops[] =
1786         { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
1787           getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) };
1788       return SDValue(CurDAG->getMachineNode(PPC::RLWINM8, dl, MVT::i64,
1789                                             Ops), 0);
1790     }
1791 
1792     if (InstMaskEnd == 63) {
1793       SDValue Ops[] =
1794         { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
1795           getI32Imm(InstMaskStart, dl) };
1796       return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Ops), 0);
1797     }
1798 
1799     if (InstMaskStart == 0) {
1800       SDValue Ops[] =
1801         { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
1802           getI32Imm(InstMaskEnd, dl) };
1803       return SDValue(CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Ops), 0);
1804     }
1805 
1806     if (InstMaskEnd == 63 - RLAmt) {
1807       SDValue Ops[] =
1808         { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
1809           getI32Imm(InstMaskStart, dl) };
1810       return SDValue(CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, Ops), 0);
1811     }
1812 
1813     // We cannot do this with a single instruction, so we'll use two. The
1814     // problem is that we're not free to choose both a rotation amount and mask
1815     // start and end independently. We can choose an arbitrary mask start and
1816     // end, but then the rotation amount is fixed. Rotation, however, can be
1817     // inverted, and so by applying an "inverse" rotation first, we can get the
1818     // desired result.
1819     if (InstCnt) *InstCnt += 1;
1820 
1821     // The rotation mask for the second instruction must be MaskStart.
1822     unsigned RLAmt2 = MaskStart;
1823     // The first instruction must rotate V so that the overall rotation amount
1824     // is RLAmt.
1825     unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64;
1826     if (RLAmt1)
1827       V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63);
1828     return SelectRotMask64(V, dl, RLAmt2, false, MaskStart, MaskEnd);
1829   }
1830 
1831   // For 64-bit values, not all combinations of rotates and masks are
1832   // available. Produce a rotate-mask-and-insert if one is available.
1833   SDValue SelectRotMaskIns64(SDValue Base, SDValue V, const SDLoc &dl,
1834                              unsigned RLAmt, bool Repl32, unsigned MaskStart,
1835                              unsigned MaskEnd, unsigned *InstCnt = nullptr) {
1836     // In the notation used by the instructions, 'start' and 'end' are reversed
1837     // because bits are counted from high to low order.
1838     unsigned InstMaskStart = 64 - MaskEnd - 1,
1839              InstMaskEnd   = 64 - MaskStart - 1;
1840 
1841     if (InstCnt) *InstCnt += 1;
1842 
1843     if (Repl32) {
1844       // This rotation amount assumes that the lower 32 bits of the quantity
1845       // are replicated in the high 32 bits by the rotation operator (which is
1846       // done by rlwinm and friends).
1847       assert(InstMaskStart >= 32 && "Mask cannot start out of range");
1848       assert(InstMaskEnd   >= 32 && "Mask cannot end out of range");
1849       SDValue Ops[] =
1850         { ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
1851           getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) };
1852       return SDValue(CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64,
1853                                             Ops), 0);
1854     }
1855 
1856     if (InstMaskEnd == 63 - RLAmt) {
1857       SDValue Ops[] =
1858         { ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
1859           getI32Imm(InstMaskStart, dl) };
1860       return SDValue(CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops), 0);
1861     }
1862 
1863     // We cannot do this with a single instruction, so we'll use two. The
1864     // problem is that we're not free to choose both a rotation amount and mask
1865     // start and end independently. We can choose an arbitrary mask start and
1866     // end, but then the rotation amount is fixed. Rotation, however, can be
1867     // inverted, and so by applying an "inverse" rotation first, we can get the
1868     // desired result.
1869     if (InstCnt) *InstCnt += 1;
1870 
1871     // The rotation mask for the second instruction must be MaskStart.
1872     unsigned RLAmt2 = MaskStart;
1873     // The first instruction must rotate V so that the overall rotation amount
1874     // is RLAmt.
1875     unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64;
1876     if (RLAmt1)
1877       V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63);
1878     return SelectRotMaskIns64(Base, V, dl, RLAmt2, false, MaskStart, MaskEnd);
1879   }
1880 
1881   void SelectAndParts64(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) {
1882     if (BPermRewriterNoMasking)
1883       return;
1884 
1885     // The idea here is the same as in the 32-bit version, but with additional
1886     // complications from the fact that Repl32 might be true. Because we
1887     // aggressively convert bit groups to Repl32 form (which, for small
1888     // rotation factors, involves no other change), and then coalesce, it might
1889     // be the case that a single 64-bit masking operation could handle both
1890     // some Repl32 groups and some non-Repl32 groups. If converting to Repl32
1891     // form allowed coalescing, then we must use a 32-bit rotaton in order to
1892     // completely capture the new combined bit group.
1893 
1894     for (ValueRotInfo &VRI : ValueRotsVec) {
1895       uint64_t Mask = 0;
1896 
1897       // We need to add to the mask all bits from the associated bit groups.
1898       // If Repl32 is false, we need to add bits from bit groups that have
1899       // Repl32 true, but are trivially convertable to Repl32 false. Such a
1900       // group is trivially convertable if it overlaps only with the lower 32
1901       // bits, and the group has not been coalesced.
1902       auto MatchingBG = [VRI](const BitGroup &BG) {
1903         if (VRI.V != BG.V)
1904           return false;
1905 
1906         unsigned EffRLAmt = BG.RLAmt;
1907         if (!VRI.Repl32 && BG.Repl32) {
1908           if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx <= BG.EndIdx &&
1909               !BG.Repl32Coalesced) {
1910             if (BG.Repl32CR)
1911               EffRLAmt += 32;
1912           } else {
1913             return false;
1914           }
1915         } else if (VRI.Repl32 != BG.Repl32) {
1916           return false;
1917         }
1918 
1919         return VRI.RLAmt == EffRLAmt;
1920       };
1921 
1922       for (auto &BG : BitGroups) {
1923         if (!MatchingBG(BG))
1924           continue;
1925 
1926         if (BG.StartIdx <= BG.EndIdx) {
1927           for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i)
1928             Mask |= (UINT64_C(1) << i);
1929         } else {
1930           for (unsigned i = BG.StartIdx; i < Bits.size(); ++i)
1931             Mask |= (UINT64_C(1) << i);
1932           for (unsigned i = 0; i <= BG.EndIdx; ++i)
1933             Mask |= (UINT64_C(1) << i);
1934         }
1935       }
1936 
1937       // We can use the 32-bit andi/andis technique if the mask does not
1938       // require any higher-order bits. This can save an instruction compared
1939       // to always using the general 64-bit technique.
1940       bool Use32BitInsts = isUInt<32>(Mask);
1941       // Compute the masks for andi/andis that would be necessary.
1942       unsigned ANDIMask = (Mask & UINT16_MAX),
1943                ANDISMask = (Mask >> 16) & UINT16_MAX;
1944 
1945       bool NeedsRotate = VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask));
1946 
1947       unsigned NumAndInsts = (unsigned) NeedsRotate +
1948                              (unsigned) (bool) Res;
1949       if (Use32BitInsts)
1950         NumAndInsts += (unsigned) (ANDIMask != 0) + (unsigned) (ANDISMask != 0) +
1951                        (unsigned) (ANDIMask != 0 && ANDISMask != 0);
1952       else
1953         NumAndInsts += selectI64ImmInstrCount(Mask) + /* and */ 1;
1954 
1955       unsigned NumRLInsts = 0;
1956       bool FirstBG = true;
1957       bool MoreBG = false;
1958       for (auto &BG : BitGroups) {
1959         if (!MatchingBG(BG)) {
1960           MoreBG = true;
1961           continue;
1962         }
1963         NumRLInsts +=
1964           SelectRotMask64Count(BG.RLAmt, BG.Repl32, BG.StartIdx, BG.EndIdx,
1965                                !FirstBG);
1966         FirstBG = false;
1967       }
1968 
1969       DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode() <<
1970                       " RL: " << VRI.RLAmt << (VRI.Repl32 ? " (32):" : ":") <<
1971                       "\n\t\t\tisel using masking: " << NumAndInsts <<
1972                       " using rotates: " << NumRLInsts << "\n");
1973 
1974       // When we'd use andi/andis, we bias toward using the rotates (andi only
1975       // has a record form, and is cracked on POWER cores). However, when using
1976       // general 64-bit constant formation, bias toward the constant form,
1977       // because that exposes more opportunities for CSE.
1978       if (NumAndInsts > NumRLInsts)
1979         continue;
1980       // When merging multiple bit groups, instruction or is used.
1981       // But when rotate is used, rldimi can inert the rotated value into any
1982       // register, so instruction or can be avoided.
1983       if ((Use32BitInsts || MoreBG) && NumAndInsts == NumRLInsts)
1984         continue;
1985 
1986       DEBUG(dbgs() << "\t\t\t\tusing masking\n");
1987 
1988       if (InstCnt) *InstCnt += NumAndInsts;
1989 
1990       SDValue VRot;
1991       // We actually need to generate a rotation if we have a non-zero rotation
1992       // factor or, in the Repl32 case, if we care about any of the
1993       // higher-order replicated bits. In the latter case, we generate a mask
1994       // backward so that it actually includes the entire 64 bits.
1995       if (VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask)))
1996         VRot = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32,
1997                                VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63);
1998       else
1999         VRot = VRI.V;
2000 
2001       SDValue TotalVal;
2002       if (Use32BitInsts) {
2003         assert((ANDIMask != 0 || ANDISMask != 0) &&
2004                "No set bits in mask when using 32-bit ands for 64-bit value");
2005 
2006         SDValue ANDIVal, ANDISVal;
2007         if (ANDIMask != 0)
2008           ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo8, dl, MVT::i64,
2009                                                    ExtendToInt64(VRot, dl),
2010                                                    getI32Imm(ANDIMask, dl)),
2011                             0);
2012         if (ANDISMask != 0)
2013           ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo8, dl, MVT::i64,
2014                                                     ExtendToInt64(VRot, dl),
2015                                                     getI32Imm(ANDISMask, dl)),
2016                              0);
2017 
2018         if (!ANDIVal)
2019           TotalVal = ANDISVal;
2020         else if (!ANDISVal)
2021           TotalVal = ANDIVal;
2022         else
2023           TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
2024                                ExtendToInt64(ANDIVal, dl), ANDISVal), 0);
2025       } else {
2026         TotalVal = SDValue(selectI64Imm(CurDAG, dl, Mask), 0);
2027         TotalVal =
2028           SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64,
2029                                          ExtendToInt64(VRot, dl), TotalVal),
2030                   0);
2031      }
2032 
2033       if (!Res)
2034         Res = TotalVal;
2035       else
2036         Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
2037                                              ExtendToInt64(Res, dl), TotalVal),
2038                       0);
2039 
2040       // Now, remove all groups with this underlying value and rotation
2041       // factor.
2042       eraseMatchingBitGroups(MatchingBG);
2043     }
2044   }
2045 
2046   // Instruction selection for the 64-bit case.
2047   SDNode *Select64(SDNode *N, bool LateMask, unsigned *InstCnt) {
2048     SDLoc dl(N);
2049     SDValue Res;
2050 
2051     if (InstCnt) *InstCnt = 0;
2052 
2053     // Take care of cases that should use andi/andis first.
2054     SelectAndParts64(dl, Res, InstCnt);
2055 
2056     // If we've not yet selected a 'starting' instruction, and we have no zeros
2057     // to fill in, select the (Value, RLAmt) with the highest priority (largest
2058     // number of groups), and start with this rotated value.
2059     if ((!HasZeros || LateMask) && !Res) {
2060       // If we have both Repl32 groups and non-Repl32 groups, the non-Repl32
2061       // groups will come first, and so the VRI representing the largest number
2062       // of groups might not be first (it might be the first Repl32 groups).
2063       unsigned MaxGroupsIdx = 0;
2064       if (!ValueRotsVec[0].Repl32) {
2065         for (unsigned i = 0, ie = ValueRotsVec.size(); i < ie; ++i)
2066           if (ValueRotsVec[i].Repl32) {
2067             if (ValueRotsVec[i].NumGroups > ValueRotsVec[0].NumGroups)
2068               MaxGroupsIdx = i;
2069             break;
2070           }
2071       }
2072 
2073       ValueRotInfo &VRI = ValueRotsVec[MaxGroupsIdx];
2074       bool NeedsRotate = false;
2075       if (VRI.RLAmt) {
2076         NeedsRotate = true;
2077       } else if (VRI.Repl32) {
2078         for (auto &BG : BitGroups) {
2079           if (BG.V != VRI.V || BG.RLAmt != VRI.RLAmt ||
2080               BG.Repl32 != VRI.Repl32)
2081             continue;
2082 
2083           // We don't need a rotate if the bit group is confined to the lower
2084           // 32 bits.
2085           if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx < BG.EndIdx)
2086             continue;
2087 
2088           NeedsRotate = true;
2089           break;
2090         }
2091       }
2092 
2093       if (NeedsRotate)
2094         Res = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32,
2095                               VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63,
2096                               InstCnt);
2097       else
2098         Res = VRI.V;
2099 
2100       // Now, remove all groups with this underlying value and rotation factor.
2101       if (Res)
2102         eraseMatchingBitGroups([VRI](const BitGroup &BG) {
2103           return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt &&
2104                  BG.Repl32 == VRI.Repl32;
2105         });
2106     }
2107 
2108     // Because 64-bit rotates are more flexible than inserts, we might have a
2109     // preference regarding which one we do first (to save one instruction).
2110     if (!Res)
2111       for (auto I = BitGroups.begin(), IE = BitGroups.end(); I != IE; ++I) {
2112         if (SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx,
2113                                 false) <
2114             SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx,
2115                                 true)) {
2116           if (I != BitGroups.begin()) {
2117             BitGroup BG = *I;
2118             BitGroups.erase(I);
2119             BitGroups.insert(BitGroups.begin(), BG);
2120           }
2121 
2122           break;
2123         }
2124       }
2125 
2126     // Insert the other groups (one at a time).
2127     for (auto &BG : BitGroups) {
2128       if (!Res)
2129         Res = SelectRotMask64(BG.V, dl, BG.RLAmt, BG.Repl32, BG.StartIdx,
2130                               BG.EndIdx, InstCnt);
2131       else
2132         Res = SelectRotMaskIns64(Res, BG.V, dl, BG.RLAmt, BG.Repl32,
2133                                  BG.StartIdx, BG.EndIdx, InstCnt);
2134     }
2135 
2136     if (LateMask) {
2137       uint64_t Mask = getZerosMask();
2138 
2139       // We can use the 32-bit andi/andis technique if the mask does not
2140       // require any higher-order bits. This can save an instruction compared
2141       // to always using the general 64-bit technique.
2142       bool Use32BitInsts = isUInt<32>(Mask);
2143       // Compute the masks for andi/andis that would be necessary.
2144       unsigned ANDIMask = (Mask & UINT16_MAX),
2145                ANDISMask = (Mask >> 16) & UINT16_MAX;
2146 
2147       if (Use32BitInsts) {
2148         assert((ANDIMask != 0 || ANDISMask != 0) &&
2149                "No set bits in mask when using 32-bit ands for 64-bit value");
2150 
2151         if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) +
2152                                  (unsigned) (ANDISMask != 0) +
2153                                  (unsigned) (ANDIMask != 0 && ANDISMask != 0);
2154 
2155         SDValue ANDIVal, ANDISVal;
2156         if (ANDIMask != 0)
2157           ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo8, dl, MVT::i64,
2158                               ExtendToInt64(Res, dl), getI32Imm(ANDIMask, dl)), 0);
2159         if (ANDISMask != 0)
2160           ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo8, dl, MVT::i64,
2161                                ExtendToInt64(Res, dl), getI32Imm(ANDISMask, dl)), 0);
2162 
2163         if (!ANDIVal)
2164           Res = ANDISVal;
2165         else if (!ANDISVal)
2166           Res = ANDIVal;
2167         else
2168           Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
2169                           ExtendToInt64(ANDIVal, dl), ANDISVal), 0);
2170       } else {
2171         if (InstCnt) *InstCnt += selectI64ImmInstrCount(Mask) + /* and */ 1;
2172 
2173         SDValue MaskVal = SDValue(selectI64Imm(CurDAG, dl, Mask), 0);
2174         Res =
2175           SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64,
2176                                          ExtendToInt64(Res, dl), MaskVal), 0);
2177       }
2178     }
2179 
2180     return Res.getNode();
2181   }
2182 
2183   SDNode *Select(SDNode *N, bool LateMask, unsigned *InstCnt = nullptr) {
2184     // Fill in BitGroups.
2185     collectBitGroups(LateMask);
2186     if (BitGroups.empty())
2187       return nullptr;
2188 
2189     // For 64-bit values, figure out when we can use 32-bit instructions.
2190     if (Bits.size() == 64)
2191       assignRepl32BitGroups();
2192 
2193     // Fill in ValueRotsVec.
2194     collectValueRotInfo();
2195 
2196     if (Bits.size() == 32) {
2197       return Select32(N, LateMask, InstCnt);
2198     } else {
2199       assert(Bits.size() == 64 && "Not 64 bits here?");
2200       return Select64(N, LateMask, InstCnt);
2201     }
2202 
2203     return nullptr;
2204   }
2205 
2206   void eraseMatchingBitGroups(function_ref<bool(const BitGroup &)> F) {
2207     BitGroups.erase(remove_if(BitGroups, F), BitGroups.end());
2208   }
2209 
2210   SmallVector<ValueBit, 64> Bits;
2211 
2212   bool HasZeros;
2213   SmallVector<unsigned, 64> RLAmt;
2214 
2215   SmallVector<BitGroup, 16> BitGroups;
2216 
2217   DenseMap<std::pair<SDValue, unsigned>, ValueRotInfo> ValueRots;
2218   SmallVector<ValueRotInfo, 16> ValueRotsVec;
2219 
2220   SelectionDAG *CurDAG;
2221 
2222 public:
2223   BitPermutationSelector(SelectionDAG *DAG)
2224     : CurDAG(DAG) {}
2225 
2226   // Here we try to match complex bit permutations into a set of
2227   // rotate-and-shift/shift/and/or instructions, using a set of heuristics
2228   // known to produce optimial code for common cases (like i32 byte swapping).
2229   SDNode *Select(SDNode *N) {
2230     Memoizer.clear();
2231     auto Result =
2232         getValueBits(SDValue(N, 0), N->getValueType(0).getSizeInBits());
2233     if (!Result.first)
2234       return nullptr;
2235     Bits = std::move(*Result.second);
2236 
2237     DEBUG(dbgs() << "Considering bit-permutation-based instruction"
2238                     " selection for:    ");
2239     DEBUG(N->dump(CurDAG));
2240 
2241     // Fill it RLAmt and set HasZeros.
2242     computeRotationAmounts();
2243 
2244     if (!HasZeros)
2245       return Select(N, false);
2246 
2247     // We currently have two techniques for handling results with zeros: early
2248     // masking (the default) and late masking. Late masking is sometimes more
2249     // efficient, but because the structure of the bit groups is different, it
2250     // is hard to tell without generating both and comparing the results. With
2251     // late masking, we ignore zeros in the resulting value when inserting each
2252     // set of bit groups, and then mask in the zeros at the end. With early
2253     // masking, we only insert the non-zero parts of the result at every step.
2254 
2255     unsigned InstCnt, InstCntLateMask;
2256     DEBUG(dbgs() << "\tEarly masking:\n");
2257     SDNode *RN = Select(N, false, &InstCnt);
2258     DEBUG(dbgs() << "\t\tisel would use " << InstCnt << " instructions\n");
2259 
2260     DEBUG(dbgs() << "\tLate masking:\n");
2261     SDNode *RNLM = Select(N, true, &InstCntLateMask);
2262     DEBUG(dbgs() << "\t\tisel would use " << InstCntLateMask <<
2263                     " instructions\n");
2264 
2265     if (InstCnt <= InstCntLateMask) {
2266       DEBUG(dbgs() << "\tUsing early-masking for isel\n");
2267       return RN;
2268     }
2269 
2270     DEBUG(dbgs() << "\tUsing late-masking for isel\n");
2271     return RNLM;
2272   }
2273 };
2274 
2275 class IntegerCompareEliminator {
2276   SelectionDAG *CurDAG;
2277   PPCDAGToDAGISel *S;
2278   // Conversion type for interpreting results of a 32-bit instruction as
2279   // a 64-bit value or vice versa.
2280   enum ExtOrTruncConversion { Ext, Trunc };
2281 
2282   // Modifiers to guide how an ISD::SETCC node's result is to be computed
2283   // in a GPR.
2284   // ZExtOrig - use the original condition code, zero-extend value
2285   // ZExtInvert - invert the condition code, zero-extend value
2286   // SExtOrig - use the original condition code, sign-extend value
2287   // SExtInvert - invert the condition code, sign-extend value
2288   enum SetccInGPROpts { ZExtOrig, ZExtInvert, SExtOrig, SExtInvert };
2289 
2290   // Comparisons against zero to emit GPR code sequences for. Each of these
2291   // sequences may need to be emitted for two or more equivalent patterns.
2292   // For example (a >= 0) == (a > -1). The direction of the comparison (</>)
2293   // matters as well as the extension type: sext (-1/0), zext (1/0).
2294   // GEZExt - (zext (LHS >= 0))
2295   // GESExt - (sext (LHS >= 0))
2296   // LEZExt - (zext (LHS <= 0))
2297   // LESExt - (sext (LHS <= 0))
2298   enum ZeroCompare { GEZExt, GESExt, LEZExt, LESExt };
2299 
2300   SDNode *tryEXTEND(SDNode *N);
2301   SDNode *tryLogicOpOfCompares(SDNode *N);
2302   SDValue computeLogicOpInGPR(SDValue LogicOp);
2303   SDValue signExtendInputIfNeeded(SDValue Input);
2304   SDValue zeroExtendInputIfNeeded(SDValue Input);
2305   SDValue addExtOrTrunc(SDValue NatWidthRes, ExtOrTruncConversion Conv);
2306   SDValue getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl,
2307                                         ZeroCompare CmpTy);
2308   SDValue get32BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2309                               int64_t RHSValue, SDLoc dl);
2310  SDValue get32BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2311                               int64_t RHSValue, SDLoc dl);
2312   SDValue get64BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2313                               int64_t RHSValue, SDLoc dl);
2314   SDValue get64BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2315                               int64_t RHSValue, SDLoc dl);
2316   SDValue getSETCCInGPR(SDValue Compare, SetccInGPROpts ConvOpts);
2317 
2318 public:
2319   IntegerCompareEliminator(SelectionDAG *DAG,
2320                            PPCDAGToDAGISel *Sel) : CurDAG(DAG), S(Sel) {
2321     assert(CurDAG->getTargetLoweringInfo()
2322            .getPointerTy(CurDAG->getDataLayout()).getSizeInBits() == 64 &&
2323            "Only expecting to use this on 64 bit targets.");
2324   }
2325   SDNode *Select(SDNode *N) {
2326     if (CmpInGPR == ICGPR_None)
2327       return nullptr;
2328     switch (N->getOpcode()) {
2329     default: break;
2330     case ISD::ZERO_EXTEND:
2331       if (CmpInGPR == ICGPR_Sext || CmpInGPR == ICGPR_SextI32 ||
2332           CmpInGPR == ICGPR_SextI64)
2333         return nullptr;
2334       LLVM_FALLTHROUGH;
2335     case ISD::SIGN_EXTEND:
2336       if (CmpInGPR == ICGPR_Zext || CmpInGPR == ICGPR_ZextI32 ||
2337           CmpInGPR == ICGPR_ZextI64)
2338         return nullptr;
2339       return tryEXTEND(N);
2340     case ISD::AND:
2341     case ISD::OR:
2342     case ISD::XOR:
2343       return tryLogicOpOfCompares(N);
2344     }
2345     return nullptr;
2346   }
2347 };
2348 
2349 static bool isLogicOp(unsigned Opc) {
2350   return Opc == ISD::AND || Opc == ISD::OR || Opc == ISD::XOR;
2351 }
2352 // The obvious case for wanting to keep the value in a GPR. Namely, the
2353 // result of the comparison is actually needed in a GPR.
2354 SDNode *IntegerCompareEliminator::tryEXTEND(SDNode *N) {
2355   assert((N->getOpcode() == ISD::ZERO_EXTEND ||
2356           N->getOpcode() == ISD::SIGN_EXTEND) &&
2357          "Expecting a zero/sign extend node!");
2358   SDValue WideRes;
2359   // If we are zero-extending the result of a logical operation on i1
2360   // values, we can keep the values in GPRs.
2361   if (isLogicOp(N->getOperand(0).getOpcode()) &&
2362       N->getOperand(0).getValueType() == MVT::i1 &&
2363       N->getOpcode() == ISD::ZERO_EXTEND)
2364     WideRes = computeLogicOpInGPR(N->getOperand(0));
2365   else if (N->getOperand(0).getOpcode() != ISD::SETCC)
2366     return nullptr;
2367   else
2368     WideRes =
2369       getSETCCInGPR(N->getOperand(0),
2370                     N->getOpcode() == ISD::SIGN_EXTEND ?
2371                     SetccInGPROpts::SExtOrig : SetccInGPROpts::ZExtOrig);
2372 
2373   if (!WideRes)
2374     return nullptr;
2375 
2376   SDLoc dl(N);
2377   bool Input32Bit = WideRes.getValueType() == MVT::i32;
2378   bool Output32Bit = N->getValueType(0) == MVT::i32;
2379 
2380   NumSextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 1 : 0;
2381   NumZextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 0 : 1;
2382 
2383   SDValue ConvOp = WideRes;
2384   if (Input32Bit != Output32Bit)
2385     ConvOp = addExtOrTrunc(WideRes, Input32Bit ? ExtOrTruncConversion::Ext :
2386                            ExtOrTruncConversion::Trunc);
2387   return ConvOp.getNode();
2388 }
2389 
2390 // Attempt to perform logical operations on the results of comparisons while
2391 // keeping the values in GPRs. Without doing so, these would end up being
2392 // lowered to CR-logical operations which suffer from significant latency and
2393 // low ILP.
2394 SDNode *IntegerCompareEliminator::tryLogicOpOfCompares(SDNode *N) {
2395   if (N->getValueType(0) != MVT::i1)
2396     return nullptr;
2397   assert(isLogicOp(N->getOpcode()) &&
2398          "Expected a logic operation on setcc results.");
2399   SDValue LoweredLogical = computeLogicOpInGPR(SDValue(N, 0));
2400   if (!LoweredLogical)
2401     return nullptr;
2402 
2403   SDLoc dl(N);
2404   bool IsBitwiseNegate = LoweredLogical.getMachineOpcode() == PPC::XORI8;
2405   unsigned SubRegToExtract = IsBitwiseNegate ? PPC::sub_eq : PPC::sub_gt;
2406   SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32);
2407   SDValue LHS = LoweredLogical.getOperand(0);
2408   SDValue RHS = LoweredLogical.getOperand(1);
2409   SDValue WideOp;
2410   SDValue OpToConvToRecForm;
2411 
2412   // Look through any 32-bit to 64-bit implicit extend nodes to find the
2413   // opcode that is input to the XORI.
2414   if (IsBitwiseNegate &&
2415       LoweredLogical.getOperand(0).getMachineOpcode() == PPC::INSERT_SUBREG)
2416     OpToConvToRecForm = LoweredLogical.getOperand(0).getOperand(1);
2417   else if (IsBitwiseNegate)
2418     // If the input to the XORI isn't an extension, that's what we're after.
2419     OpToConvToRecForm = LoweredLogical.getOperand(0);
2420   else
2421     // If this is not an XORI, it is a reg-reg logical op and we can convert
2422     // it to record-form.
2423     OpToConvToRecForm = LoweredLogical;
2424 
2425   // Get the record-form version of the node we're looking to use to get the
2426   // CR result from.
2427   uint16_t NonRecOpc = OpToConvToRecForm.getMachineOpcode();
2428   int NewOpc = PPCInstrInfo::getRecordFormOpcode(NonRecOpc);
2429 
2430   // Convert the right node to record-form. This is either the logical we're
2431   // looking at or it is the input node to the negation (if we're looking at
2432   // a bitwise negation).
2433   if (NewOpc != -1 && IsBitwiseNegate) {
2434     // The input to the XORI has a record-form. Use it.
2435     assert(LoweredLogical.getConstantOperandVal(1) == 1 &&
2436            "Expected a PPC::XORI8 only for bitwise negation.");
2437     // Emit the record-form instruction.
2438     std::vector<SDValue> Ops;
2439     for (int i = 0, e = OpToConvToRecForm.getNumOperands(); i < e; i++)
2440       Ops.push_back(OpToConvToRecForm.getOperand(i));
2441 
2442     WideOp =
2443       SDValue(CurDAG->getMachineNode(NewOpc, dl,
2444                                      OpToConvToRecForm.getValueType(),
2445                                      MVT::Glue, Ops), 0);
2446   } else {
2447     assert((NewOpc != -1 || !IsBitwiseNegate) &&
2448            "No record form available for AND8/OR8/XOR8?");
2449     WideOp =
2450       SDValue(CurDAG->getMachineNode(NewOpc == -1 ? PPC::ANDIo8 : NewOpc, dl,
2451                                      MVT::i64, MVT::Glue, LHS, RHS), 0);
2452   }
2453 
2454   // Select this node to a single bit from CR0 set by the record-form node
2455   // just created. For bitwise negation, use the EQ bit which is the equivalent
2456   // of negating the result (i.e. it is a bit set when the result of the
2457   // operation is zero).
2458   SDValue SRIdxVal =
2459     CurDAG->getTargetConstant(SubRegToExtract, dl, MVT::i32);
2460   SDValue CRBit =
2461     SDValue(CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl,
2462                                    MVT::i1, CR0Reg, SRIdxVal,
2463                                    WideOp.getValue(1)), 0);
2464   return CRBit.getNode();
2465 }
2466 
2467 // Lower a logical operation on i1 values into a GPR sequence if possible.
2468 // The result can be kept in a GPR if requested.
2469 // Three types of inputs can be handled:
2470 // - SETCC
2471 // - TRUNCATE
2472 // - Logical operation (AND/OR/XOR)
2473 // There is also a special case that is handled (namely a complement operation
2474 // achieved with xor %a, -1).
2475 SDValue IntegerCompareEliminator::computeLogicOpInGPR(SDValue LogicOp) {
2476   assert(isLogicOp(LogicOp.getOpcode()) &&
2477         "Can only handle logic operations here.");
2478   assert(LogicOp.getValueType() == MVT::i1 &&
2479          "Can only handle logic operations on i1 values here.");
2480   SDLoc dl(LogicOp);
2481   SDValue LHS, RHS;
2482 
2483  // Special case: xor %a, -1
2484   bool IsBitwiseNegation = isBitwiseNot(LogicOp);
2485 
2486   // Produces a GPR sequence for each operand of the binary logic operation.
2487   // For SETCC, it produces the respective comparison, for TRUNCATE it truncates
2488   // the value in a GPR and for logic operations, it will recursively produce
2489   // a GPR sequence for the operation.
2490  auto getLogicOperand = [&] (SDValue Operand) -> SDValue {
2491     unsigned OperandOpcode = Operand.getOpcode();
2492     if (OperandOpcode == ISD::SETCC)
2493       return getSETCCInGPR(Operand, SetccInGPROpts::ZExtOrig);
2494     else if (OperandOpcode == ISD::TRUNCATE) {
2495       SDValue InputOp = Operand.getOperand(0);
2496      EVT InVT = InputOp.getValueType();
2497       return SDValue(CurDAG->getMachineNode(InVT == MVT::i32 ? PPC::RLDICL_32 :
2498                                             PPC::RLDICL, dl, InVT, InputOp,
2499                                             S->getI64Imm(0, dl),
2500                                             S->getI64Imm(63, dl)), 0);
2501     } else if (isLogicOp(OperandOpcode))
2502       return computeLogicOpInGPR(Operand);
2503     return SDValue();
2504   };
2505   LHS = getLogicOperand(LogicOp.getOperand(0));
2506   RHS = getLogicOperand(LogicOp.getOperand(1));
2507 
2508   // If a GPR sequence can't be produced for the LHS we can't proceed.
2509   // Not producing a GPR sequence for the RHS is only a problem if this isn't
2510   // a bitwise negation operation.
2511   if (!LHS || (!RHS && !IsBitwiseNegation))
2512     return SDValue();
2513 
2514   NumLogicOpsOnComparison++;
2515 
2516   // We will use the inputs as 64-bit values.
2517   if (LHS.getValueType() == MVT::i32)
2518     LHS = addExtOrTrunc(LHS, ExtOrTruncConversion::Ext);
2519   if (!IsBitwiseNegation && RHS.getValueType() == MVT::i32)
2520     RHS = addExtOrTrunc(RHS, ExtOrTruncConversion::Ext);
2521 
2522   unsigned NewOpc;
2523   switch (LogicOp.getOpcode()) {
2524   default: llvm_unreachable("Unknown logic operation.");
2525   case ISD::AND: NewOpc = PPC::AND8; break;
2526   case ISD::OR:  NewOpc = PPC::OR8;  break;
2527   case ISD::XOR: NewOpc = PPC::XOR8; break;
2528   }
2529 
2530   if (IsBitwiseNegation) {
2531     RHS = S->getI64Imm(1, dl);
2532     NewOpc = PPC::XORI8;
2533   }
2534 
2535   return SDValue(CurDAG->getMachineNode(NewOpc, dl, MVT::i64, LHS, RHS), 0);
2536 
2537 }
2538 
2539 /// If the value isn't guaranteed to be sign-extended to 64-bits, extend it.
2540 /// Otherwise just reinterpret it as a 64-bit value.
2541 /// Useful when emitting comparison code for 32-bit values without using
2542 /// the compare instruction (which only considers the lower 32-bits).
2543 SDValue IntegerCompareEliminator::signExtendInputIfNeeded(SDValue Input) {
2544   assert(Input.getValueType() == MVT::i32 &&
2545          "Can only sign-extend 32-bit values here.");
2546   unsigned Opc = Input.getOpcode();
2547 
2548   // The value was sign extended and then truncated to 32-bits. No need to
2549   // sign extend it again.
2550   if (Opc == ISD::TRUNCATE &&
2551       (Input.getOperand(0).getOpcode() == ISD::AssertSext ||
2552        Input.getOperand(0).getOpcode() == ISD::SIGN_EXTEND))
2553     return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
2554 
2555   LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input);
2556   // The input is a sign-extending load. All ppc sign-extending loads
2557   // sign-extend to the full 64-bits.
2558   if (InputLoad && InputLoad->getExtensionType() == ISD::SEXTLOAD)
2559     return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
2560 
2561   ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input);
2562   // We don't sign-extend constants.
2563   if (InputConst)
2564     return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
2565 
2566   SDLoc dl(Input);
2567   SignExtensionsAdded++;
2568   return SDValue(CurDAG->getMachineNode(PPC::EXTSW_32_64, dl,
2569                                         MVT::i64, Input), 0);
2570 }
2571 
2572 /// If the value isn't guaranteed to be zero-extended to 64-bits, extend it.
2573 /// Otherwise just reinterpret it as a 64-bit value.
2574 /// Useful when emitting comparison code for 32-bit values without using
2575 /// the compare instruction (which only considers the lower 32-bits).
2576 SDValue IntegerCompareEliminator::zeroExtendInputIfNeeded(SDValue Input) {
2577   assert(Input.getValueType() == MVT::i32 &&
2578          "Can only zero-extend 32-bit values here.");
2579   unsigned Opc = Input.getOpcode();
2580 
2581   // The only condition under which we can omit the actual extend instruction:
2582   // - The value is a positive constant
2583   // - The value comes from a load that isn't a sign-extending load
2584   // An ISD::TRUNCATE needs to be zero-extended unless it is fed by a zext.
2585   bool IsTruncateOfZExt = Opc == ISD::TRUNCATE &&
2586     (Input.getOperand(0).getOpcode() == ISD::AssertZext ||
2587      Input.getOperand(0).getOpcode() == ISD::ZERO_EXTEND);
2588   if (IsTruncateOfZExt)
2589     return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
2590 
2591   ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input);
2592   if (InputConst && InputConst->getSExtValue() >= 0)
2593     return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
2594 
2595   LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input);
2596   // The input is a load that doesn't sign-extend (it will be zero-extended).
2597   if (InputLoad && InputLoad->getExtensionType() != ISD::SEXTLOAD)
2598     return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
2599 
2600   // None of the above, need to zero-extend.
2601   SDLoc dl(Input);
2602   ZeroExtensionsAdded++;
2603   return SDValue(CurDAG->getMachineNode(PPC::RLDICL_32_64, dl, MVT::i64, Input,
2604                                         S->getI64Imm(0, dl),
2605                                         S->getI64Imm(32, dl)), 0);
2606 }
2607 
2608 // Handle a 32-bit value in a 64-bit register and vice-versa. These are of
2609 // course not actual zero/sign extensions that will generate machine code,
2610 // they're just a way to reinterpret a 32 bit value in a register as a
2611 // 64 bit value and vice-versa.
2612 SDValue IntegerCompareEliminator::addExtOrTrunc(SDValue NatWidthRes,
2613                                                 ExtOrTruncConversion Conv) {
2614   SDLoc dl(NatWidthRes);
2615 
2616   // For reinterpreting 32-bit values as 64 bit values, we generate
2617   // INSERT_SUBREG IMPLICIT_DEF:i64, <input>, TargetConstant:i32<1>
2618   if (Conv == ExtOrTruncConversion::Ext) {
2619     SDValue ImDef(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, MVT::i64), 0);
2620     SDValue SubRegIdx =
2621       CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
2622     return SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, MVT::i64,
2623                                           ImDef, NatWidthRes, SubRegIdx), 0);
2624   }
2625 
2626   assert(Conv == ExtOrTruncConversion::Trunc &&
2627          "Unknown convertion between 32 and 64 bit values.");
2628   // For reinterpreting 64-bit values as 32-bit values, we just need to
2629   // EXTRACT_SUBREG (i.e. extract the low word).
2630   SDValue SubRegIdx =
2631     CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
2632   return SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl, MVT::i32,
2633                                         NatWidthRes, SubRegIdx), 0);
2634 }
2635 
2636 // Produce a GPR sequence for compound comparisons (<=, >=) against zero.
2637 // Handle both zero-extensions and sign-extensions.
2638 SDValue
2639 IntegerCompareEliminator::getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl,
2640                                                          ZeroCompare CmpTy) {
2641   EVT InVT = LHS.getValueType();
2642   bool Is32Bit = InVT == MVT::i32;
2643   SDValue ToExtend;
2644 
2645   // Produce the value that needs to be either zero or sign extended.
2646   switch (CmpTy) {
2647   case ZeroCompare::GEZExt:
2648   case ZeroCompare::GESExt:
2649     ToExtend = SDValue(CurDAG->getMachineNode(Is32Bit ? PPC::NOR : PPC::NOR8,
2650                                               dl, InVT, LHS, LHS), 0);
2651     break;
2652   case ZeroCompare::LEZExt:
2653   case ZeroCompare::LESExt: {
2654     if (Is32Bit) {
2655       // Upper 32 bits cannot be undefined for this sequence.
2656       LHS = signExtendInputIfNeeded(LHS);
2657       SDValue Neg =
2658         SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);
2659       ToExtend =
2660         SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
2661                                        Neg, S->getI64Imm(1, dl),
2662                                        S->getI64Imm(63, dl)), 0);
2663     } else {
2664       SDValue Addi =
2665         SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,
2666                                        S->getI64Imm(~0ULL, dl)), 0);
2667       ToExtend = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
2668                                                 Addi, LHS), 0);
2669     }
2670     break;
2671   }
2672   }
2673 
2674   // For 64-bit sequences, the extensions are the same for the GE/LE cases.
2675   if (!Is32Bit &&
2676       (CmpTy == ZeroCompare::GEZExt || CmpTy == ZeroCompare::LEZExt))
2677     return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
2678                                           ToExtend, S->getI64Imm(1, dl),
2679                                           S->getI64Imm(63, dl)), 0);
2680   if (!Is32Bit &&
2681       (CmpTy == ZeroCompare::GESExt || CmpTy == ZeroCompare::LESExt))
2682     return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, ToExtend,
2683                                           S->getI64Imm(63, dl)), 0);
2684 
2685   assert(Is32Bit && "Should have handled the 32-bit sequences above.");
2686   // For 32-bit sequences, the extensions differ between GE/LE cases.
2687   switch (CmpTy) {
2688   case ZeroCompare::GEZExt: {
2689     SDValue ShiftOps[] = { ToExtend, S->getI32Imm(1, dl), S->getI32Imm(31, dl),
2690                            S->getI32Imm(31, dl) };
2691     return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
2692                                           ShiftOps), 0);
2693   }
2694   case ZeroCompare::GESExt:
2695     return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, ToExtend,
2696                                           S->getI32Imm(31, dl)), 0);
2697   case ZeroCompare::LEZExt:
2698     return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, ToExtend,
2699                                           S->getI32Imm(1, dl)), 0);
2700   case ZeroCompare::LESExt:
2701     return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, ToExtend,
2702                                           S->getI32Imm(-1, dl)), 0);
2703   }
2704 
2705   // The above case covers all the enumerators so it can't have a default clause
2706   // to avoid compiler warnings.
2707   llvm_unreachable("Unknown zero-comparison type.");
2708 }
2709 
2710 /// Produces a zero-extended result of comparing two 32-bit values according to
2711 /// the passed condition code.
2712 SDValue
2713 IntegerCompareEliminator::get32BitZExtCompare(SDValue LHS, SDValue RHS,
2714                                               ISD::CondCode CC,
2715                                               int64_t RHSValue, SDLoc dl) {
2716   if (CmpInGPR == ICGPR_I64 || CmpInGPR == ICGPR_SextI64 ||
2717       CmpInGPR == ICGPR_ZextI64 || CmpInGPR == ICGPR_Sext)
2718     return SDValue();
2719   bool IsRHSZero = RHSValue == 0;
2720   bool IsRHSOne = RHSValue == 1;
2721   bool IsRHSNegOne = RHSValue == -1LL;
2722   switch (CC) {
2723   default: return SDValue();
2724   case ISD::SETEQ: {
2725     // (zext (setcc %a, %b, seteq)) -> (lshr (cntlzw (xor %a, %b)), 5)
2726     // (zext (setcc %a, 0, seteq))  -> (lshr (cntlzw %a), 5)
2727     SDValue Xor = IsRHSZero ? LHS :
2728       SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
2729     SDValue Clz =
2730       SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
2731     SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl),
2732       S->getI32Imm(31, dl) };
2733     return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
2734                                           ShiftOps), 0);
2735   }
2736   case ISD::SETNE: {
2737     // (zext (setcc %a, %b, setne)) -> (xor (lshr (cntlzw (xor %a, %b)), 5), 1)
2738     // (zext (setcc %a, 0, setne))  -> (xor (lshr (cntlzw %a), 5), 1)
2739     SDValue Xor = IsRHSZero ? LHS :
2740       SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
2741     SDValue Clz =
2742       SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
2743     SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl),
2744       S->getI32Imm(31, dl) };
2745     SDValue Shift =
2746       SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0);
2747     return SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift,
2748                                           S->getI32Imm(1, dl)), 0);
2749   }
2750   case ISD::SETGE: {
2751     // (zext (setcc %a, %b, setge)) -> (xor (lshr (sub %a, %b), 63), 1)
2752     // (zext (setcc %a, 0, setge))  -> (lshr (~ %a), 31)
2753     if(IsRHSZero)
2754       return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
2755 
2756     // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a)
2757     // by swapping inputs and falling through.
2758     std::swap(LHS, RHS);
2759     ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
2760     IsRHSZero = RHSConst && RHSConst->isNullValue();
2761     LLVM_FALLTHROUGH;
2762   }
2763   case ISD::SETLE: {
2764     if (CmpInGPR == ICGPR_NonExtIn)
2765       return SDValue();
2766     // (zext (setcc %a, %b, setle)) -> (xor (lshr (sub %b, %a), 63), 1)
2767     // (zext (setcc %a, 0, setle))  -> (xor (lshr (- %a), 63), 1)
2768     if(IsRHSZero) {
2769       if (CmpInGPR == ICGPR_NonExtIn)
2770         return SDValue();
2771       return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
2772     }
2773 
2774     // The upper 32-bits of the register can't be undefined for this sequence.
2775     LHS = signExtendInputIfNeeded(LHS);
2776     RHS = signExtendInputIfNeeded(RHS);
2777     SDValue Sub =
2778       SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);
2779     SDValue Shift =
2780       SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Sub,
2781                                      S->getI64Imm(1, dl), S->getI64Imm(63, dl)),
2782               0);
2783     return
2784       SDValue(CurDAG->getMachineNode(PPC::XORI8, dl,
2785                                      MVT::i64, Shift, S->getI32Imm(1, dl)), 0);
2786   }
2787   case ISD::SETGT: {
2788     // (zext (setcc %a, %b, setgt)) -> (lshr (sub %b, %a), 63)
2789     // (zext (setcc %a, -1, setgt)) -> (lshr (~ %a), 31)
2790     // (zext (setcc %a, 0, setgt))  -> (lshr (- %a), 63)
2791     // Handle SETLT -1 (which is equivalent to SETGE 0).
2792     if (IsRHSNegOne)
2793       return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
2794 
2795     if (IsRHSZero) {
2796       if (CmpInGPR == ICGPR_NonExtIn)
2797         return SDValue();
2798       // The upper 32-bits of the register can't be undefined for this sequence.
2799       LHS = signExtendInputIfNeeded(LHS);
2800       RHS = signExtendInputIfNeeded(RHS);
2801       SDValue Neg =
2802         SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);
2803       return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
2804                      Neg, S->getI32Imm(1, dl), S->getI32Imm(63, dl)), 0);
2805     }
2806     // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as
2807     // (%b < %a) by swapping inputs and falling through.
2808     std::swap(LHS, RHS);
2809     ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
2810     IsRHSZero = RHSConst && RHSConst->isNullValue();
2811     IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
2812     LLVM_FALLTHROUGH;
2813   }
2814   case ISD::SETLT: {
2815     // (zext (setcc %a, %b, setlt)) -> (lshr (sub %a, %b), 63)
2816     // (zext (setcc %a, 1, setlt))  -> (xor (lshr (- %a), 63), 1)
2817     // (zext (setcc %a, 0, setlt))  -> (lshr %a, 31)
2818     // Handle SETLT 1 (which is equivalent to SETLE 0).
2819     if (IsRHSOne) {
2820       if (CmpInGPR == ICGPR_NonExtIn)
2821         return SDValue();
2822       return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
2823     }
2824 
2825     if (IsRHSZero) {
2826       SDValue ShiftOps[] = { LHS, S->getI32Imm(1, dl), S->getI32Imm(31, dl),
2827                              S->getI32Imm(31, dl) };
2828       return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
2829                                             ShiftOps), 0);
2830     }
2831 
2832     if (CmpInGPR == ICGPR_NonExtIn)
2833       return SDValue();
2834     // The upper 32-bits of the register can't be undefined for this sequence.
2835     LHS = signExtendInputIfNeeded(LHS);
2836     RHS = signExtendInputIfNeeded(RHS);
2837     SDValue SUBFNode =
2838       SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
2839     return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
2840                                     SUBFNode, S->getI64Imm(1, dl),
2841                                     S->getI64Imm(63, dl)), 0);
2842   }
2843   case ISD::SETUGE:
2844     // (zext (setcc %a, %b, setuge)) -> (xor (lshr (sub %b, %a), 63), 1)
2845     // (zext (setcc %a, %b, setule)) -> (xor (lshr (sub %a, %b), 63), 1)
2846     std::swap(LHS, RHS);
2847     LLVM_FALLTHROUGH;
2848   case ISD::SETULE: {
2849     if (CmpInGPR == ICGPR_NonExtIn)
2850       return SDValue();
2851     // The upper 32-bits of the register can't be undefined for this sequence.
2852     LHS = zeroExtendInputIfNeeded(LHS);
2853     RHS = zeroExtendInputIfNeeded(RHS);
2854     SDValue Subtract =
2855       SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);
2856     SDValue SrdiNode =
2857       SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
2858                                           Subtract, S->getI64Imm(1, dl),
2859                                           S->getI64Imm(63, dl)), 0);
2860     return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, SrdiNode,
2861                                             S->getI32Imm(1, dl)), 0);
2862   }
2863   case ISD::SETUGT:
2864     // (zext (setcc %a, %b, setugt)) -> (lshr (sub %b, %a), 63)
2865     // (zext (setcc %a, %b, setult)) -> (lshr (sub %a, %b), 63)
2866     std::swap(LHS, RHS);
2867     LLVM_FALLTHROUGH;
2868   case ISD::SETULT: {
2869     if (CmpInGPR == ICGPR_NonExtIn)
2870       return SDValue();
2871     // The upper 32-bits of the register can't be undefined for this sequence.
2872     LHS = zeroExtendInputIfNeeded(LHS);
2873     RHS = zeroExtendInputIfNeeded(RHS);
2874     SDValue Subtract =
2875       SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
2876     return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
2877                                           Subtract, S->getI64Imm(1, dl),
2878                                           S->getI64Imm(63, dl)), 0);
2879   }
2880   }
2881 }
2882 
2883 /// Produces a sign-extended result of comparing two 32-bit values according to
2884 /// the passed condition code.
2885 SDValue
2886 IntegerCompareEliminator::get32BitSExtCompare(SDValue LHS, SDValue RHS,
2887                                               ISD::CondCode CC,
2888                                               int64_t RHSValue, SDLoc dl) {
2889   if (CmpInGPR == ICGPR_I64 || CmpInGPR == ICGPR_SextI64 ||
2890       CmpInGPR == ICGPR_ZextI64 || CmpInGPR == ICGPR_Zext)
2891     return SDValue();
2892   bool IsRHSZero = RHSValue == 0;
2893   bool IsRHSOne = RHSValue == 1;
2894   bool IsRHSNegOne = RHSValue == -1LL;
2895 
2896   switch (CC) {
2897   default: return SDValue();
2898   case ISD::SETEQ: {
2899     // (sext (setcc %a, %b, seteq)) ->
2900     //   (ashr (shl (ctlz (xor %a, %b)), 58), 63)
2901     // (sext (setcc %a, 0, seteq)) ->
2902     //   (ashr (shl (ctlz %a), 58), 63)
2903     SDValue CountInput = IsRHSZero ? LHS :
2904       SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
2905     SDValue Cntlzw =
2906       SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, CountInput), 0);
2907     SDValue SHLOps[] = { Cntlzw, S->getI32Imm(27, dl),
2908                          S->getI32Imm(5, dl), S->getI32Imm(31, dl) };
2909     SDValue Slwi =
2910       SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, SHLOps), 0);
2911     return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Slwi), 0);
2912   }
2913   case ISD::SETNE: {
2914     // Bitwise xor the operands, count leading zeros, shift right by 5 bits and
2915     // flip the bit, finally take 2's complement.
2916     // (sext (setcc %a, %b, setne)) ->
2917     //   (neg (xor (lshr (ctlz (xor %a, %b)), 5), 1))
2918     // Same as above, but the first xor is not needed.
2919     // (sext (setcc %a, 0, setne)) ->
2920     //   (neg (xor (lshr (ctlz %a), 5), 1))
2921     SDValue Xor = IsRHSZero ? LHS :
2922       SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
2923     SDValue Clz =
2924       SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
2925     SDValue ShiftOps[] =
2926       { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl), S->getI32Imm(31, dl) };
2927     SDValue Shift =
2928       SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0);
2929     SDValue Xori =
2930       SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift,
2931                                      S->getI32Imm(1, dl)), 0);
2932     return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Xori), 0);
2933   }
2934   case ISD::SETGE: {
2935     // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %a, %b), 63), -1)
2936     // (sext (setcc %a, 0, setge))  -> (ashr (~ %a), 31)
2937     if (IsRHSZero)
2938       return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
2939 
2940     // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a)
2941     // by swapping inputs and falling through.
2942     std::swap(LHS, RHS);
2943     ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
2944     IsRHSZero = RHSConst && RHSConst->isNullValue();
2945     LLVM_FALLTHROUGH;
2946   }
2947   case ISD::SETLE: {
2948     if (CmpInGPR == ICGPR_NonExtIn)
2949       return SDValue();
2950     // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %b, %a), 63), -1)
2951     // (sext (setcc %a, 0, setle))  -> (add (lshr (- %a), 63), -1)
2952     if (IsRHSZero)
2953       return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
2954 
2955     // The upper 32-bits of the register can't be undefined for this sequence.
2956     LHS = signExtendInputIfNeeded(LHS);
2957     RHS = signExtendInputIfNeeded(RHS);
2958     SDValue SUBFNode =
2959       SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, MVT::Glue,
2960                                      LHS, RHS), 0);
2961     SDValue Srdi =
2962       SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
2963                                      SUBFNode, S->getI64Imm(1, dl),
2964                                      S->getI64Imm(63, dl)), 0);
2965     return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Srdi,
2966                                           S->getI32Imm(-1, dl)), 0);
2967   }
2968   case ISD::SETGT: {
2969     // (sext (setcc %a, %b, setgt)) -> (ashr (sub %b, %a), 63)
2970     // (sext (setcc %a, -1, setgt)) -> (ashr (~ %a), 31)
2971     // (sext (setcc %a, 0, setgt))  -> (ashr (- %a), 63)
2972     if (IsRHSNegOne)
2973       return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
2974     if (IsRHSZero) {
2975       if (CmpInGPR == ICGPR_NonExtIn)
2976         return SDValue();
2977       // The upper 32-bits of the register can't be undefined for this sequence.
2978       LHS = signExtendInputIfNeeded(LHS);
2979       RHS = signExtendInputIfNeeded(RHS);
2980       SDValue Neg =
2981         SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);
2982         return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Neg,
2983                                               S->getI64Imm(63, dl)), 0);
2984     }
2985     // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as
2986     // (%b < %a) by swapping inputs and falling through.
2987     std::swap(LHS, RHS);
2988     ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
2989     IsRHSZero = RHSConst && RHSConst->isNullValue();
2990     IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
2991     LLVM_FALLTHROUGH;
2992   }
2993   case ISD::SETLT: {
2994     // (sext (setcc %a, %b, setgt)) -> (ashr (sub %a, %b), 63)
2995     // (sext (setcc %a, 1, setgt))  -> (add (lshr (- %a), 63), -1)
2996     // (sext (setcc %a, 0, setgt))  -> (ashr %a, 31)
2997     if (IsRHSOne) {
2998       if (CmpInGPR == ICGPR_NonExtIn)
2999         return SDValue();
3000       return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3001     }
3002     if (IsRHSZero)
3003       return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, LHS,
3004                                             S->getI32Imm(31, dl)), 0);
3005 
3006     if (CmpInGPR == ICGPR_NonExtIn)
3007       return SDValue();
3008     // The upper 32-bits of the register can't be undefined for this sequence.
3009     LHS = signExtendInputIfNeeded(LHS);
3010     RHS = signExtendInputIfNeeded(RHS);
3011     SDValue SUBFNode =
3012       SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
3013     return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3014                                           SUBFNode, S->getI64Imm(63, dl)), 0);
3015   }
3016   case ISD::SETUGE:
3017     // (sext (setcc %a, %b, setuge)) -> (add (lshr (sub %a, %b), 63), -1)
3018     // (sext (setcc %a, %b, setule)) -> (add (lshr (sub %b, %a), 63), -1)
3019     std::swap(LHS, RHS);
3020     LLVM_FALLTHROUGH;
3021   case ISD::SETULE: {
3022     if (CmpInGPR == ICGPR_NonExtIn)
3023       return SDValue();
3024     // The upper 32-bits of the register can't be undefined for this sequence.
3025     LHS = zeroExtendInputIfNeeded(LHS);
3026     RHS = zeroExtendInputIfNeeded(RHS);
3027     SDValue Subtract =
3028       SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);
3029     SDValue Shift =
3030       SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Subtract,
3031                                      S->getI32Imm(1, dl), S->getI32Imm(63,dl)),
3032               0);
3033     return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Shift,
3034                                           S->getI32Imm(-1, dl)), 0);
3035   }
3036   case ISD::SETUGT:
3037     // (sext (setcc %a, %b, setugt)) -> (ashr (sub %b, %a), 63)
3038     // (sext (setcc %a, %b, setugt)) -> (ashr (sub %a, %b), 63)
3039     std::swap(LHS, RHS);
3040     LLVM_FALLTHROUGH;
3041   case ISD::SETULT: {
3042     if (CmpInGPR == ICGPR_NonExtIn)
3043       return SDValue();
3044     // The upper 32-bits of the register can't be undefined for this sequence.
3045     LHS = zeroExtendInputIfNeeded(LHS);
3046     RHS = zeroExtendInputIfNeeded(RHS);
3047     SDValue Subtract =
3048       SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
3049     return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3050                                           Subtract, S->getI64Imm(63, dl)), 0);
3051   }
3052   }
3053 }
3054 
3055 /// Produces a zero-extended result of comparing two 64-bit values according to
3056 /// the passed condition code.
3057 SDValue
3058 IntegerCompareEliminator::get64BitZExtCompare(SDValue LHS, SDValue RHS,
3059                                               ISD::CondCode CC,
3060                                               int64_t RHSValue, SDLoc dl) {
3061   if (CmpInGPR == ICGPR_I32 || CmpInGPR == ICGPR_SextI32 ||
3062       CmpInGPR == ICGPR_ZextI32 || CmpInGPR == ICGPR_Sext)
3063     return SDValue();
3064   bool IsRHSZero = RHSValue == 0;
3065   bool IsRHSOne = RHSValue == 1;
3066   bool IsRHSNegOne = RHSValue == -1LL;
3067   switch (CC) {
3068   default: return SDValue();
3069   case ISD::SETEQ: {
3070     // (zext (setcc %a, %b, seteq)) -> (lshr (ctlz (xor %a, %b)), 6)
3071     // (zext (setcc %a, 0, seteq)) ->  (lshr (ctlz %a), 6)
3072     SDValue Xor = IsRHSZero ? LHS :
3073       SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
3074     SDValue Clz =
3075       SDValue(CurDAG->getMachineNode(PPC::CNTLZD, dl, MVT::i64, Xor), 0);
3076     return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Clz,
3077                                           S->getI64Imm(58, dl),
3078                                           S->getI64Imm(63, dl)), 0);
3079   }
3080   case ISD::SETNE: {
3081     // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1)
3082     // (zext (setcc %a, %b, setne)) -> (sube addc.reg, addc.reg, addc.CA)
3083     // {addcz.reg, addcz.CA} = (addcarry %a, -1)
3084     // (zext (setcc %a, 0, setne)) -> (sube addcz.reg, addcz.reg, addcz.CA)
3085     SDValue Xor = IsRHSZero ? LHS :
3086       SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
3087     SDValue AC =
3088       SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue,
3089                                      Xor, S->getI32Imm(~0U, dl)), 0);
3090     return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, AC,
3091                                           Xor, AC.getValue(1)), 0);
3092   }
3093   case ISD::SETGE: {
3094     // {subc.reg, subc.CA} = (subcarry %a, %b)
3095     // (zext (setcc %a, %b, setge)) ->
3096     //   (adde (lshr %b, 63), (ashr %a, 63), subc.CA)
3097     // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 63)
3098     if (IsRHSZero)
3099       return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
3100     std::swap(LHS, RHS);
3101     ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3102     IsRHSZero = RHSConst && RHSConst->isNullValue();
3103     LLVM_FALLTHROUGH;
3104   }
3105   case ISD::SETLE: {
3106     // {subc.reg, subc.CA} = (subcarry %b, %a)
3107     // (zext (setcc %a, %b, setge)) ->
3108     //   (adde (lshr %a, 63), (ashr %b, 63), subc.CA)
3109     // (zext (setcc %a, 0, setge)) -> (lshr (or %a, (add %a, -1)), 63)
3110     if (IsRHSZero)
3111       return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
3112     SDValue ShiftL =
3113       SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,
3114                                      S->getI64Imm(1, dl),
3115                                      S->getI64Imm(63, dl)), 0);
3116     SDValue ShiftR =
3117       SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS,
3118                                      S->getI64Imm(63, dl)), 0);
3119     SDValue SubtractCarry =
3120       SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3121                                      LHS, RHS), 1);
3122     return SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,
3123                                           ShiftR, ShiftL, SubtractCarry), 0);
3124   }
3125   case ISD::SETGT: {
3126     // {subc.reg, subc.CA} = (subcarry %b, %a)
3127     // (zext (setcc %a, %b, setgt)) ->
3128     //   (xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1)
3129     // (zext (setcc %a, 0, setgt)) -> (lshr (nor (add %a, -1), %a), 63)
3130     if (IsRHSNegOne)
3131       return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
3132     if (IsRHSZero) {
3133       SDValue Addi =
3134         SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,
3135                                        S->getI64Imm(~0ULL, dl)), 0);
3136       SDValue Nor =
3137         SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Addi, LHS), 0);
3138       return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Nor,
3139                                             S->getI64Imm(1, dl),
3140                                             S->getI64Imm(63, dl)), 0);
3141     }
3142     std::swap(LHS, RHS);
3143     ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3144     IsRHSZero = RHSConst && RHSConst->isNullValue();
3145     IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
3146     LLVM_FALLTHROUGH;
3147   }
3148   case ISD::SETLT: {
3149     // {subc.reg, subc.CA} = (subcarry %a, %b)
3150     // (zext (setcc %a, %b, setlt)) ->
3151     //   (xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1)
3152     // (zext (setcc %a, 0, setlt)) -> (lshr %a, 63)
3153     if (IsRHSOne)
3154       return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
3155     if (IsRHSZero)
3156       return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,
3157                                             S->getI64Imm(1, dl),
3158                                             S->getI64Imm(63, dl)), 0);
3159     SDValue SRADINode =
3160       SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3161                                      LHS, S->getI64Imm(63, dl)), 0);
3162     SDValue SRDINode =
3163       SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3164                                      RHS, S->getI64Imm(1, dl),
3165                                      S->getI64Imm(63, dl)), 0);
3166     SDValue SUBFC8Carry =
3167       SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3168                                      RHS, LHS), 1);
3169     SDValue ADDE8Node =
3170       SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,
3171                                      SRDINode, SRADINode, SUBFC8Carry), 0);
3172     return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,
3173                                           ADDE8Node, S->getI64Imm(1, dl)), 0);
3174   }
3175   case ISD::SETUGE:
3176     // {subc.reg, subc.CA} = (subcarry %a, %b)
3177     // (zext (setcc %a, %b, setuge)) -> (add (sube %b, %b, subc.CA), 1)
3178     std::swap(LHS, RHS);
3179     LLVM_FALLTHROUGH;
3180   case ISD::SETULE: {
3181     // {subc.reg, subc.CA} = (subcarry %b, %a)
3182     // (zext (setcc %a, %b, setule)) -> (add (sube %a, %a, subc.CA), 1)
3183     SDValue SUBFC8Carry =
3184       SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3185                                      LHS, RHS), 1);
3186     SDValue SUBFE8Node =
3187       SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue,
3188                                      LHS, LHS, SUBFC8Carry), 0);
3189     return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64,
3190                                           SUBFE8Node, S->getI64Imm(1, dl)), 0);
3191   }
3192   case ISD::SETUGT:
3193     // {subc.reg, subc.CA} = (subcarry %b, %a)
3194     // (zext (setcc %a, %b, setugt)) -> -(sube %b, %b, subc.CA)
3195     std::swap(LHS, RHS);
3196     LLVM_FALLTHROUGH;
3197   case ISD::SETULT: {
3198     // {subc.reg, subc.CA} = (subcarry %a, %b)
3199     // (zext (setcc %a, %b, setult)) -> -(sube %a, %a, subc.CA)
3200     SDValue SubtractCarry =
3201       SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3202                                      RHS, LHS), 1);
3203     SDValue ExtSub =
3204       SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64,
3205                                      LHS, LHS, SubtractCarry), 0);
3206     return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64,
3207                                           ExtSub), 0);
3208   }
3209   }
3210 }
3211 
3212 /// Produces a sign-extended result of comparing two 64-bit values according to
3213 /// the passed condition code.
3214 SDValue
3215 IntegerCompareEliminator::get64BitSExtCompare(SDValue LHS, SDValue RHS,
3216                                               ISD::CondCode CC,
3217                                               int64_t RHSValue, SDLoc dl) {
3218   if (CmpInGPR == ICGPR_I32 || CmpInGPR == ICGPR_SextI32 ||
3219       CmpInGPR == ICGPR_ZextI32 || CmpInGPR == ICGPR_Zext)
3220     return SDValue();
3221   bool IsRHSZero = RHSValue == 0;
3222   bool IsRHSOne = RHSValue == 1;
3223   bool IsRHSNegOne = RHSValue == -1LL;
3224   switch (CC) {
3225   default: return SDValue();
3226   case ISD::SETEQ: {
3227     // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1)
3228     // (sext (setcc %a, %b, seteq)) -> (sube addc.reg, addc.reg, addc.CA)
3229     // {addcz.reg, addcz.CA} = (addcarry %a, -1)
3230     // (sext (setcc %a, 0, seteq)) -> (sube addcz.reg, addcz.reg, addcz.CA)
3231     SDValue AddInput = IsRHSZero ? LHS :
3232       SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
3233     SDValue Addic =
3234       SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue,
3235                                      AddInput, S->getI32Imm(~0U, dl)), 0);
3236     return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, Addic,
3237                                           Addic, Addic.getValue(1)), 0);
3238   }
3239   case ISD::SETNE: {
3240     // {subfc.reg, subfc.CA} = (subcarry 0, (xor %a, %b))
3241     // (sext (setcc %a, %b, setne)) -> (sube subfc.reg, subfc.reg, subfc.CA)
3242     // {subfcz.reg, subfcz.CA} = (subcarry 0, %a)
3243     // (sext (setcc %a, 0, setne)) -> (sube subfcz.reg, subfcz.reg, subfcz.CA)
3244     SDValue Xor = IsRHSZero ? LHS :
3245       SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
3246     SDValue SC =
3247       SDValue(CurDAG->getMachineNode(PPC::SUBFIC8, dl, MVT::i64, MVT::Glue,
3248                                      Xor, S->getI32Imm(0, dl)), 0);
3249     return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, SC,
3250                                           SC, SC.getValue(1)), 0);
3251   }
3252   case ISD::SETGE: {
3253     // {subc.reg, subc.CA} = (subcarry %a, %b)
3254     // (zext (setcc %a, %b, setge)) ->
3255     //   (- (adde (lshr %b, 63), (ashr %a, 63), subc.CA))
3256     // (zext (setcc %a, 0, setge)) -> (~ (ashr %a, 63))
3257     if (IsRHSZero)
3258       return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3259     std::swap(LHS, RHS);
3260     ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3261     IsRHSZero = RHSConst && RHSConst->isNullValue();
3262     LLVM_FALLTHROUGH;
3263   }
3264   case ISD::SETLE: {
3265     // {subc.reg, subc.CA} = (subcarry %b, %a)
3266     // (zext (setcc %a, %b, setge)) ->
3267     //   (- (adde (lshr %a, 63), (ashr %b, 63), subc.CA))
3268     // (zext (setcc %a, 0, setge)) -> (ashr (or %a, (add %a, -1)), 63)
3269     if (IsRHSZero)
3270       return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3271     SDValue ShiftR =
3272       SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS,
3273                                      S->getI64Imm(63, dl)), 0);
3274     SDValue ShiftL =
3275       SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,
3276                                      S->getI64Imm(1, dl),
3277                                      S->getI64Imm(63, dl)), 0);
3278     SDValue SubtractCarry =
3279       SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3280                                      LHS, RHS), 1);
3281     SDValue Adde =
3282       SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,
3283                                      ShiftR, ShiftL, SubtractCarry), 0);
3284     return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, Adde), 0);
3285   }
3286   case ISD::SETGT: {
3287     // {subc.reg, subc.CA} = (subcarry %b, %a)
3288     // (zext (setcc %a, %b, setgt)) ->
3289     //   -(xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1)
3290     // (zext (setcc %a, 0, setgt)) -> (ashr (nor (add %a, -1), %a), 63)
3291     if (IsRHSNegOne)
3292       return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3293     if (IsRHSZero) {
3294       SDValue Add =
3295         SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,
3296                                        S->getI64Imm(-1, dl)), 0);
3297       SDValue Nor =
3298         SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Add, LHS), 0);
3299       return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Nor,
3300                                             S->getI64Imm(63, dl)), 0);
3301     }
3302     std::swap(LHS, RHS);
3303     ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3304     IsRHSZero = RHSConst && RHSConst->isNullValue();
3305     IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
3306     LLVM_FALLTHROUGH;
3307   }
3308   case ISD::SETLT: {
3309     // {subc.reg, subc.CA} = (subcarry %a, %b)
3310     // (zext (setcc %a, %b, setlt)) ->
3311     //   -(xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1)
3312     // (zext (setcc %a, 0, setlt)) -> (ashr %a, 63)
3313     if (IsRHSOne)
3314       return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3315     if (IsRHSZero) {
3316       return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, LHS,
3317                                             S->getI64Imm(63, dl)), 0);
3318     }
3319     SDValue SRADINode =
3320       SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3321                                      LHS, S->getI64Imm(63, dl)), 0);
3322     SDValue SRDINode =
3323       SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3324                                      RHS, S->getI64Imm(1, dl),
3325                                      S->getI64Imm(63, dl)), 0);
3326     SDValue SUBFC8Carry =
3327       SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3328                                      RHS, LHS), 1);
3329     SDValue ADDE8Node =
3330       SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64,
3331                                      SRDINode, SRADINode, SUBFC8Carry), 0);
3332     SDValue XORI8Node =
3333       SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,
3334                                      ADDE8Node, S->getI64Imm(1, dl)), 0);
3335     return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64,
3336                                           XORI8Node), 0);
3337   }
3338   case ISD::SETUGE:
3339     // {subc.reg, subc.CA} = (subcarry %a, %b)
3340     // (sext (setcc %a, %b, setuge)) -> ~(sube %b, %b, subc.CA)
3341     std::swap(LHS, RHS);
3342     LLVM_FALLTHROUGH;
3343   case ISD::SETULE: {
3344     // {subc.reg, subc.CA} = (subcarry %b, %a)
3345     // (sext (setcc %a, %b, setule)) -> ~(sube %a, %a, subc.CA)
3346     SDValue SubtractCarry =
3347       SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3348                                      LHS, RHS), 1);
3349     SDValue ExtSub =
3350       SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue, LHS,
3351                                      LHS, SubtractCarry), 0);
3352     return SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64,
3353                                           ExtSub, ExtSub), 0);
3354   }
3355   case ISD::SETUGT:
3356     // {subc.reg, subc.CA} = (subcarry %b, %a)
3357     // (sext (setcc %a, %b, setugt)) -> (sube %b, %b, subc.CA)
3358     std::swap(LHS, RHS);
3359     LLVM_FALLTHROUGH;
3360   case ISD::SETULT: {
3361     // {subc.reg, subc.CA} = (subcarry %a, %b)
3362     // (sext (setcc %a, %b, setult)) -> (sube %a, %a, subc.CA)
3363     SDValue SubCarry =
3364       SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3365                                      RHS, LHS), 1);
3366     return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64,
3367                                      LHS, LHS, SubCarry), 0);
3368   }
3369   }
3370 }
3371 
3372 /// Do all uses of this SDValue need the result in a GPR?
3373 /// This is meant to be used on values that have type i1 since
3374 /// it is somewhat meaningless to ask if values of other types
3375 /// should be kept in GPR's.
3376 static bool allUsesExtend(SDValue Compare, SelectionDAG *CurDAG) {
3377   assert(Compare.getOpcode() == ISD::SETCC &&
3378          "An ISD::SETCC node required here.");
3379 
3380   // For values that have a single use, the caller should obviously already have
3381   // checked if that use is an extending use. We check the other uses here.
3382   if (Compare.hasOneUse())
3383     return true;
3384   // We want the value in a GPR if it is being extended, used for a select, or
3385   // used in logical operations.
3386   for (auto CompareUse : Compare.getNode()->uses())
3387     if (CompareUse->getOpcode() != ISD::SIGN_EXTEND &&
3388         CompareUse->getOpcode() != ISD::ZERO_EXTEND &&
3389         CompareUse->getOpcode() != ISD::SELECT &&
3390         !isLogicOp(CompareUse->getOpcode())) {
3391       OmittedForNonExtendUses++;
3392       return false;
3393     }
3394   return true;
3395 }
3396 
3397 /// Returns an equivalent of a SETCC node but with the result the same width as
3398 /// the inputs. This can nalso be used for SELECT_CC if either the true or false
3399 /// values is a power of two while the other is zero.
3400 SDValue IntegerCompareEliminator::getSETCCInGPR(SDValue Compare,
3401                                                 SetccInGPROpts ConvOpts) {
3402   assert((Compare.getOpcode() == ISD::SETCC ||
3403           Compare.getOpcode() == ISD::SELECT_CC) &&
3404          "An ISD::SETCC node required here.");
3405 
3406   // Don't convert this comparison to a GPR sequence because there are uses
3407   // of the i1 result (i.e. uses that require the result in the CR).
3408   if ((Compare.getOpcode() == ISD::SETCC) && !allUsesExtend(Compare, CurDAG))
3409     return SDValue();
3410 
3411   SDValue LHS = Compare.getOperand(0);
3412   SDValue RHS = Compare.getOperand(1);
3413 
3414   // The condition code is operand 2 for SETCC and operand 4 for SELECT_CC.
3415   int CCOpNum = Compare.getOpcode() == ISD::SELECT_CC ? 4 : 2;
3416   ISD::CondCode CC =
3417     cast<CondCodeSDNode>(Compare.getOperand(CCOpNum))->get();
3418   EVT InputVT = LHS.getValueType();
3419   if (InputVT != MVT::i32 && InputVT != MVT::i64)
3420     return SDValue();
3421 
3422   if (ConvOpts == SetccInGPROpts::ZExtInvert ||
3423       ConvOpts == SetccInGPROpts::SExtInvert)
3424     CC = ISD::getSetCCInverse(CC, true);
3425 
3426   bool Inputs32Bit = InputVT == MVT::i32;
3427 
3428   SDLoc dl(Compare);
3429   ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3430   int64_t RHSValue = RHSConst ? RHSConst->getSExtValue() : INT64_MAX;
3431   bool IsSext = ConvOpts == SetccInGPROpts::SExtOrig ||
3432     ConvOpts == SetccInGPROpts::SExtInvert;
3433 
3434   if (IsSext && Inputs32Bit)
3435     return get32BitSExtCompare(LHS, RHS, CC, RHSValue, dl);
3436   else if (Inputs32Bit)
3437     return get32BitZExtCompare(LHS, RHS, CC, RHSValue, dl);
3438   else if (IsSext)
3439     return get64BitSExtCompare(LHS, RHS, CC, RHSValue, dl);
3440   return get64BitZExtCompare(LHS, RHS, CC, RHSValue, dl);
3441 }
3442 
3443 } // end anonymous namespace
3444 
3445 bool PPCDAGToDAGISel::tryIntCompareInGPR(SDNode *N) {
3446   if (N->getValueType(0) != MVT::i32 &&
3447       N->getValueType(0) != MVT::i64)
3448     return false;
3449 
3450   // This optimization will emit code that assumes 64-bit registers
3451   // so we don't want to run it in 32-bit mode. Also don't run it
3452   // on functions that are not to be optimized.
3453   if (TM.getOptLevel() == CodeGenOpt::None || !TM.isPPC64())
3454     return false;
3455 
3456   switch (N->getOpcode()) {
3457   default: break;
3458   case ISD::ZERO_EXTEND:
3459   case ISD::SIGN_EXTEND:
3460   case ISD::AND:
3461   case ISD::OR:
3462   case ISD::XOR: {
3463     IntegerCompareEliminator ICmpElim(CurDAG, this);
3464     if (SDNode *New = ICmpElim.Select(N)) {
3465       ReplaceNode(N, New);
3466       return true;
3467     }
3468   }
3469   }
3470   return false;
3471 }
3472 
3473 bool PPCDAGToDAGISel::tryBitPermutation(SDNode *N) {
3474   if (N->getValueType(0) != MVT::i32 &&
3475       N->getValueType(0) != MVT::i64)
3476     return false;
3477 
3478   if (!UseBitPermRewriter)
3479     return false;
3480 
3481   switch (N->getOpcode()) {
3482   default: break;
3483   case ISD::ROTL:
3484   case ISD::SHL:
3485   case ISD::SRL:
3486   case ISD::AND:
3487   case ISD::OR: {
3488     BitPermutationSelector BPS(CurDAG);
3489     if (SDNode *New = BPS.Select(N)) {
3490       ReplaceNode(N, New);
3491       return true;
3492     }
3493     return false;
3494   }
3495   }
3496 
3497   return false;
3498 }
3499 
3500 /// SelectCC - Select a comparison of the specified values with the specified
3501 /// condition code, returning the CR# of the expression.
3502 SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
3503                                   const SDLoc &dl) {
3504   // Always select the LHS.
3505   unsigned Opc;
3506 
3507   if (LHS.getValueType() == MVT::i32) {
3508     unsigned Imm;
3509     if (CC == ISD::SETEQ || CC == ISD::SETNE) {
3510       if (isInt32Immediate(RHS, Imm)) {
3511         // SETEQ/SETNE comparison with 16-bit immediate, fold it.
3512         if (isUInt<16>(Imm))
3513           return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
3514                                                 getI32Imm(Imm & 0xFFFF, dl)),
3515                          0);
3516         // If this is a 16-bit signed immediate, fold it.
3517         if (isInt<16>((int)Imm))
3518           return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
3519                                                 getI32Imm(Imm & 0xFFFF, dl)),
3520                          0);
3521 
3522         // For non-equality comparisons, the default code would materialize the
3523         // constant, then compare against it, like this:
3524         //   lis r2, 4660
3525         //   ori r2, r2, 22136
3526         //   cmpw cr0, r3, r2
3527         // Since we are just comparing for equality, we can emit this instead:
3528         //   xoris r0,r3,0x1234
3529         //   cmplwi cr0,r0,0x5678
3530         //   beq cr0,L6
3531         SDValue Xor(CurDAG->getMachineNode(PPC::XORIS, dl, MVT::i32, LHS,
3532                                            getI32Imm(Imm >> 16, dl)), 0);
3533         return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, Xor,
3534                                               getI32Imm(Imm & 0xFFFF, dl)), 0);
3535       }
3536       Opc = PPC::CMPLW;
3537     } else if (ISD::isUnsignedIntSetCC(CC)) {
3538       if (isInt32Immediate(RHS, Imm) && isUInt<16>(Imm))
3539         return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
3540                                               getI32Imm(Imm & 0xFFFF, dl)), 0);
3541       Opc = PPC::CMPLW;
3542     } else {
3543       int16_t SImm;
3544       if (isIntS16Immediate(RHS, SImm))
3545         return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
3546                                               getI32Imm((int)SImm & 0xFFFF,
3547                                                         dl)),
3548                          0);
3549       Opc = PPC::CMPW;
3550     }
3551   } else if (LHS.getValueType() == MVT::i64) {
3552     uint64_t Imm;
3553     if (CC == ISD::SETEQ || CC == ISD::SETNE) {
3554       if (isInt64Immediate(RHS.getNode(), Imm)) {
3555         // SETEQ/SETNE comparison with 16-bit immediate, fold it.
3556         if (isUInt<16>(Imm))
3557           return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
3558                                                 getI32Imm(Imm & 0xFFFF, dl)),
3559                          0);
3560         // If this is a 16-bit signed immediate, fold it.
3561         if (isInt<16>(Imm))
3562           return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
3563                                                 getI32Imm(Imm & 0xFFFF, dl)),
3564                          0);
3565 
3566         // For non-equality comparisons, the default code would materialize the
3567         // constant, then compare against it, like this:
3568         //   lis r2, 4660
3569         //   ori r2, r2, 22136
3570         //   cmpd cr0, r3, r2
3571         // Since we are just comparing for equality, we can emit this instead:
3572         //   xoris r0,r3,0x1234
3573         //   cmpldi cr0,r0,0x5678
3574         //   beq cr0,L6
3575         if (isUInt<32>(Imm)) {
3576           SDValue Xor(CurDAG->getMachineNode(PPC::XORIS8, dl, MVT::i64, LHS,
3577                                              getI64Imm(Imm >> 16, dl)), 0);
3578           return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, Xor,
3579                                                 getI64Imm(Imm & 0xFFFF, dl)),
3580                          0);
3581         }
3582       }
3583       Opc = PPC::CMPLD;
3584     } else if (ISD::isUnsignedIntSetCC(CC)) {
3585       if (isInt64Immediate(RHS.getNode(), Imm) && isUInt<16>(Imm))
3586         return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
3587                                               getI64Imm(Imm & 0xFFFF, dl)), 0);
3588       Opc = PPC::CMPLD;
3589     } else {
3590       int16_t SImm;
3591       if (isIntS16Immediate(RHS, SImm))
3592         return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
3593                                               getI64Imm(SImm & 0xFFFF, dl)),
3594                          0);
3595       Opc = PPC::CMPD;
3596     }
3597   } else if (LHS.getValueType() == MVT::f32) {
3598     Opc = PPC::FCMPUS;
3599   } else {
3600     assert(LHS.getValueType() == MVT::f64 && "Unknown vt!");
3601     Opc = PPCSubTarget->hasVSX() ? PPC::XSCMPUDP : PPC::FCMPUD;
3602   }
3603   return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0);
3604 }
3605 
3606 static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC) {
3607   switch (CC) {
3608   case ISD::SETUEQ:
3609   case ISD::SETONE:
3610   case ISD::SETOLE:
3611   case ISD::SETOGE:
3612     llvm_unreachable("Should be lowered by legalize!");
3613   default: llvm_unreachable("Unknown condition!");
3614   case ISD::SETOEQ:
3615   case ISD::SETEQ:  return PPC::PRED_EQ;
3616   case ISD::SETUNE:
3617   case ISD::SETNE:  return PPC::PRED_NE;
3618   case ISD::SETOLT:
3619   case ISD::SETLT:  return PPC::PRED_LT;
3620   case ISD::SETULE:
3621   case ISD::SETLE:  return PPC::PRED_LE;
3622   case ISD::SETOGT:
3623   case ISD::SETGT:  return PPC::PRED_GT;
3624   case ISD::SETUGE:
3625   case ISD::SETGE:  return PPC::PRED_GE;
3626   case ISD::SETO:   return PPC::PRED_NU;
3627   case ISD::SETUO:  return PPC::PRED_UN;
3628     // These two are invalid for floating point.  Assume we have int.
3629   case ISD::SETULT: return PPC::PRED_LT;
3630   case ISD::SETUGT: return PPC::PRED_GT;
3631   }
3632 }
3633 
3634 /// getCRIdxForSetCC - Return the index of the condition register field
3635 /// associated with the SetCC condition, and whether or not the field is
3636 /// treated as inverted.  That is, lt = 0; ge = 0 inverted.
3637 static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) {
3638   Invert = false;
3639   switch (CC) {
3640   default: llvm_unreachable("Unknown condition!");
3641   case ISD::SETOLT:
3642   case ISD::SETLT:  return 0;                  // Bit #0 = SETOLT
3643   case ISD::SETOGT:
3644   case ISD::SETGT:  return 1;                  // Bit #1 = SETOGT
3645   case ISD::SETOEQ:
3646   case ISD::SETEQ:  return 2;                  // Bit #2 = SETOEQ
3647   case ISD::SETUO:  return 3;                  // Bit #3 = SETUO
3648   case ISD::SETUGE:
3649   case ISD::SETGE:  Invert = true; return 0;   // !Bit #0 = SETUGE
3650   case ISD::SETULE:
3651   case ISD::SETLE:  Invert = true; return 1;   // !Bit #1 = SETULE
3652   case ISD::SETUNE:
3653   case ISD::SETNE:  Invert = true; return 2;   // !Bit #2 = SETUNE
3654   case ISD::SETO:   Invert = true; return 3;   // !Bit #3 = SETO
3655   case ISD::SETUEQ:
3656   case ISD::SETOGE:
3657   case ISD::SETOLE:
3658   case ISD::SETONE:
3659     llvm_unreachable("Invalid branch code: should be expanded by legalize");
3660   // These are invalid for floating point.  Assume integer.
3661   case ISD::SETULT: return 0;
3662   case ISD::SETUGT: return 1;
3663   }
3664 }
3665 
3666 // getVCmpInst: return the vector compare instruction for the specified
3667 // vector type and condition code. Since this is for altivec specific code,
3668 // only support the altivec types (v16i8, v8i16, v4i32, v2i64, and v4f32).
3669 static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
3670                                 bool HasVSX, bool &Swap, bool &Negate) {
3671   Swap = false;
3672   Negate = false;
3673 
3674   if (VecVT.isFloatingPoint()) {
3675     /* Handle some cases by swapping input operands.  */
3676     switch (CC) {
3677       case ISD::SETLE: CC = ISD::SETGE; Swap = true; break;
3678       case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
3679       case ISD::SETOLE: CC = ISD::SETOGE; Swap = true; break;
3680       case ISD::SETOLT: CC = ISD::SETOGT; Swap = true; break;
3681       case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
3682       case ISD::SETUGT: CC = ISD::SETULT; Swap = true; break;
3683       default: break;
3684     }
3685     /* Handle some cases by negating the result.  */
3686     switch (CC) {
3687       case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
3688       case ISD::SETUNE: CC = ISD::SETOEQ; Negate = true; break;
3689       case ISD::SETULE: CC = ISD::SETOGT; Negate = true; break;
3690       case ISD::SETULT: CC = ISD::SETOGE; Negate = true; break;
3691       default: break;
3692     }
3693     /* We have instructions implementing the remaining cases.  */
3694     switch (CC) {
3695       case ISD::SETEQ:
3696       case ISD::SETOEQ:
3697         if (VecVT == MVT::v4f32)
3698           return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP;
3699         else if (VecVT == MVT::v2f64)
3700           return PPC::XVCMPEQDP;
3701         break;
3702       case ISD::SETGT:
3703       case ISD::SETOGT:
3704         if (VecVT == MVT::v4f32)
3705           return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP;
3706         else if (VecVT == MVT::v2f64)
3707           return PPC::XVCMPGTDP;
3708         break;
3709       case ISD::SETGE:
3710       case ISD::SETOGE:
3711         if (VecVT == MVT::v4f32)
3712           return HasVSX ? PPC::XVCMPGESP : PPC::VCMPGEFP;
3713         else if (VecVT == MVT::v2f64)
3714           return PPC::XVCMPGEDP;
3715         break;
3716       default:
3717         break;
3718     }
3719     llvm_unreachable("Invalid floating-point vector compare condition");
3720   } else {
3721     /* Handle some cases by swapping input operands.  */
3722     switch (CC) {
3723       case ISD::SETGE: CC = ISD::SETLE; Swap = true; break;
3724       case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
3725       case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
3726       case ISD::SETULT: CC = ISD::SETUGT; Swap = true; break;
3727       default: break;
3728     }
3729     /* Handle some cases by negating the result.  */
3730     switch (CC) {
3731       case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
3732       case ISD::SETUNE: CC = ISD::SETUEQ; Negate = true; break;
3733       case ISD::SETLE: CC = ISD::SETGT; Negate = true; break;
3734       case ISD::SETULE: CC = ISD::SETUGT; Negate = true; break;
3735       default: break;
3736     }
3737     /* We have instructions implementing the remaining cases.  */
3738     switch (CC) {
3739       case ISD::SETEQ:
3740       case ISD::SETUEQ:
3741         if (VecVT == MVT::v16i8)
3742           return PPC::VCMPEQUB;
3743         else if (VecVT == MVT::v8i16)
3744           return PPC::VCMPEQUH;
3745         else if (VecVT == MVT::v4i32)
3746           return PPC::VCMPEQUW;
3747         else if (VecVT == MVT::v2i64)
3748           return PPC::VCMPEQUD;
3749         break;
3750       case ISD::SETGT:
3751         if (VecVT == MVT::v16i8)
3752           return PPC::VCMPGTSB;
3753         else if (VecVT == MVT::v8i16)
3754           return PPC::VCMPGTSH;
3755         else if (VecVT == MVT::v4i32)
3756           return PPC::VCMPGTSW;
3757         else if (VecVT == MVT::v2i64)
3758           return PPC::VCMPGTSD;
3759         break;
3760       case ISD::SETUGT:
3761         if (VecVT == MVT::v16i8)
3762           return PPC::VCMPGTUB;
3763         else if (VecVT == MVT::v8i16)
3764           return PPC::VCMPGTUH;
3765         else if (VecVT == MVT::v4i32)
3766           return PPC::VCMPGTUW;
3767         else if (VecVT == MVT::v2i64)
3768           return PPC::VCMPGTUD;
3769         break;
3770       default:
3771         break;
3772     }
3773     llvm_unreachable("Invalid integer vector compare condition");
3774   }
3775 }
3776 
3777 bool PPCDAGToDAGISel::trySETCC(SDNode *N) {
3778   SDLoc dl(N);
3779   unsigned Imm;
3780   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
3781   EVT PtrVT =
3782       CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout());
3783   bool isPPC64 = (PtrVT == MVT::i64);
3784 
3785   if (!PPCSubTarget->useCRBits() &&
3786       isInt32Immediate(N->getOperand(1), Imm)) {
3787     // We can codegen setcc op, imm very efficiently compared to a brcond.
3788     // Check for those cases here.
3789     // setcc op, 0
3790     if (Imm == 0) {
3791       SDValue Op = N->getOperand(0);
3792       switch (CC) {
3793       default: break;
3794       case ISD::SETEQ: {
3795         Op = SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Op), 0);
3796         SDValue Ops[] = { Op, getI32Imm(27, dl), getI32Imm(5, dl),
3797                           getI32Imm(31, dl) };
3798         CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
3799         return true;
3800       }
3801       case ISD::SETNE: {
3802         if (isPPC64) break;
3803         SDValue AD =
3804           SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
3805                                          Op, getI32Imm(~0U, dl)), 0);
3806         CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, AD, Op, AD.getValue(1));
3807         return true;
3808       }
3809       case ISD::SETLT: {
3810         SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl),
3811                           getI32Imm(31, dl) };
3812         CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
3813         return true;
3814       }
3815       case ISD::SETGT: {
3816         SDValue T =
3817           SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Op), 0);
3818         T = SDValue(CurDAG->getMachineNode(PPC::ANDC, dl, MVT::i32, T, Op), 0);
3819         SDValue Ops[] = { T, getI32Imm(1, dl), getI32Imm(31, dl),
3820                           getI32Imm(31, dl) };
3821         CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
3822         return true;
3823       }
3824       }
3825     } else if (Imm == ~0U) {        // setcc op, -1
3826       SDValue Op = N->getOperand(0);
3827       switch (CC) {
3828       default: break;
3829       case ISD::SETEQ:
3830         if (isPPC64) break;
3831         Op = SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
3832                                             Op, getI32Imm(1, dl)), 0);
3833         CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,
3834                              SDValue(CurDAG->getMachineNode(PPC::LI, dl,
3835                                                             MVT::i32,
3836                                                             getI32Imm(0, dl)),
3837                                      0), Op.getValue(1));
3838         return true;
3839       case ISD::SETNE: {
3840         if (isPPC64) break;
3841         Op = SDValue(CurDAG->getMachineNode(PPC::NOR, dl, MVT::i32, Op, Op), 0);
3842         SDNode *AD = CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
3843                                             Op, getI32Imm(~0U, dl));
3844         CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(AD, 0), Op,
3845                              SDValue(AD, 1));
3846         return true;
3847       }
3848       case ISD::SETLT: {
3849         SDValue AD = SDValue(CurDAG->getMachineNode(PPC::ADDI, dl, MVT::i32, Op,
3850                                                     getI32Imm(1, dl)), 0);
3851         SDValue AN = SDValue(CurDAG->getMachineNode(PPC::AND, dl, MVT::i32, AD,
3852                                                     Op), 0);
3853         SDValue Ops[] = { AN, getI32Imm(1, dl), getI32Imm(31, dl),
3854                           getI32Imm(31, dl) };
3855         CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
3856         return true;
3857       }
3858       case ISD::SETGT: {
3859         SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl),
3860                           getI32Imm(31, dl) };
3861         Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
3862         CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op, getI32Imm(1, dl));
3863         return true;
3864       }
3865       }
3866     }
3867   }
3868 
3869   SDValue LHS = N->getOperand(0);
3870   SDValue RHS = N->getOperand(1);
3871 
3872   // Altivec Vector compare instructions do not set any CR register by default and
3873   // vector compare operations return the same type as the operands.
3874   if (LHS.getValueType().isVector()) {
3875     if (PPCSubTarget->hasQPX())
3876       return false;
3877 
3878     EVT VecVT = LHS.getValueType();
3879     bool Swap, Negate;
3880     unsigned int VCmpInst = getVCmpInst(VecVT.getSimpleVT(), CC,
3881                                         PPCSubTarget->hasVSX(), Swap, Negate);
3882     if (Swap)
3883       std::swap(LHS, RHS);
3884 
3885     EVT ResVT = VecVT.changeVectorElementTypeToInteger();
3886     if (Negate) {
3887       SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, ResVT, LHS, RHS), 0);
3888       CurDAG->SelectNodeTo(N, PPCSubTarget->hasVSX() ? PPC::XXLNOR : PPC::VNOR,
3889                            ResVT, VCmp, VCmp);
3890       return true;
3891     }
3892 
3893     CurDAG->SelectNodeTo(N, VCmpInst, ResVT, LHS, RHS);
3894     return true;
3895   }
3896 
3897   if (PPCSubTarget->useCRBits())
3898     return false;
3899 
3900   bool Inv;
3901   unsigned Idx = getCRIdxForSetCC(CC, Inv);
3902   SDValue CCReg = SelectCC(LHS, RHS, CC, dl);
3903   SDValue IntCR;
3904 
3905   // Force the ccreg into CR7.
3906   SDValue CR7Reg = CurDAG->getRegister(PPC::CR7, MVT::i32);
3907 
3908   SDValue InFlag(nullptr, 0);  // Null incoming flag value.
3909   CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, CR7Reg, CCReg,
3910                                InFlag).getValue(1);
3911 
3912   IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg,
3913                                          CCReg), 0);
3914 
3915   SDValue Ops[] = { IntCR, getI32Imm((32 - (3 - Idx)) & 31, dl),
3916                       getI32Imm(31, dl), getI32Imm(31, dl) };
3917   if (!Inv) {
3918     CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
3919     return true;
3920   }
3921 
3922   // Get the specified bit.
3923   SDValue Tmp =
3924     SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
3925   CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1, dl));
3926   return true;
3927 }
3928 
3929 /// Does this node represent a load/store node whose address can be represented
3930 /// with a register plus an immediate that's a multiple of \p Val:
3931 bool PPCDAGToDAGISel::isOffsetMultipleOf(SDNode *N, unsigned Val) const {
3932   LoadSDNode *LDN = dyn_cast<LoadSDNode>(N);
3933   StoreSDNode *STN = dyn_cast<StoreSDNode>(N);
3934   SDValue AddrOp;
3935   if (LDN)
3936     AddrOp = LDN->getOperand(1);
3937   else if (STN)
3938     AddrOp = STN->getOperand(2);
3939 
3940   // If the address points a frame object or a frame object with an offset,
3941   // we need to check the object alignment.
3942   short Imm = 0;
3943   if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(
3944           AddrOp.getOpcode() == ISD::ADD ? AddrOp.getOperand(0) :
3945                                            AddrOp)) {
3946     // If op0 is a frame index that is under aligned, we can't do it either,
3947     // because it is translated to r31 or r1 + slot + offset. We won't know the
3948     // slot number until the stack frame is finalized.
3949     const MachineFrameInfo &MFI = CurDAG->getMachineFunction().getFrameInfo();
3950     unsigned SlotAlign = MFI.getObjectAlignment(FI->getIndex());
3951     if ((SlotAlign % Val) != 0)
3952       return false;
3953 
3954     // If we have an offset, we need further check on the offset.
3955     if (AddrOp.getOpcode() != ISD::ADD)
3956       return true;
3957   }
3958 
3959   if (AddrOp.getOpcode() == ISD::ADD)
3960     return isIntS16Immediate(AddrOp.getOperand(1), Imm) && !(Imm % Val);
3961 
3962   // If the address comes from the outside, the offset will be zero.
3963   return AddrOp.getOpcode() == ISD::CopyFromReg;
3964 }
3965 
3966 void PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
3967   // Transfer memoperands.
3968   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
3969   MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
3970   cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
3971 }
3972 
3973 // Select - Convert the specified operand from a target-independent to a
3974 // target-specific node if it hasn't already been changed.
3975 void PPCDAGToDAGISel::Select(SDNode *N) {
3976   SDLoc dl(N);
3977   if (N->isMachineOpcode()) {
3978     N->setNodeId(-1);
3979     return;   // Already selected.
3980   }
3981 
3982   // In case any misguided DAG-level optimizations form an ADD with a
3983   // TargetConstant operand, crash here instead of miscompiling (by selecting
3984   // an r+r add instead of some kind of r+i add).
3985   if (N->getOpcode() == ISD::ADD &&
3986       N->getOperand(1).getOpcode() == ISD::TargetConstant)
3987     llvm_unreachable("Invalid ADD with TargetConstant operand");
3988 
3989   // Try matching complex bit permutations before doing anything else.
3990   if (tryBitPermutation(N))
3991     return;
3992 
3993   // Try to emit integer compares as GPR-only sequences (i.e. no use of CR).
3994   if (tryIntCompareInGPR(N))
3995     return;
3996 
3997   switch (N->getOpcode()) {
3998   default: break;
3999 
4000   case ISD::Constant:
4001     if (N->getValueType(0) == MVT::i64) {
4002       ReplaceNode(N, selectI64Imm(CurDAG, N));
4003       return;
4004     }
4005     break;
4006 
4007   case ISD::SETCC:
4008     if (trySETCC(N))
4009       return;
4010     break;
4011 
4012   case PPCISD::CALL: {
4013     const Module *M = MF->getFunction().getParent();
4014 
4015     if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 ||
4016         !PPCSubTarget->isSecurePlt() || !PPCSubTarget->isTargetELF() ||
4017         M->getPICLevel() == PICLevel::SmallPIC)
4018       break;
4019 
4020     SDValue Op = N->getOperand(1);
4021 
4022     if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
4023       if (GA->getTargetFlags() == PPCII::MO_PLT)
4024         getGlobalBaseReg();
4025     }
4026     else if (ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op)) {
4027       if (ES->getTargetFlags() == PPCII::MO_PLT)
4028         getGlobalBaseReg();
4029     }
4030   }
4031     break;
4032 
4033   case PPCISD::GlobalBaseReg:
4034     ReplaceNode(N, getGlobalBaseReg());
4035     return;
4036 
4037   case ISD::FrameIndex:
4038     selectFrameIndex(N, N);
4039     return;
4040 
4041   case PPCISD::MFOCRF: {
4042     SDValue InFlag = N->getOperand(1);
4043     ReplaceNode(N, CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32,
4044                                           N->getOperand(0), InFlag));
4045     return;
4046   }
4047 
4048   case PPCISD::READ_TIME_BASE:
4049     ReplaceNode(N, CurDAG->getMachineNode(PPC::ReadTB, dl, MVT::i32, MVT::i32,
4050                                           MVT::Other, N->getOperand(0)));
4051     return;
4052 
4053   case PPCISD::SRA_ADDZE: {
4054     SDValue N0 = N->getOperand(0);
4055     SDValue ShiftAmt =
4056       CurDAG->getTargetConstant(*cast<ConstantSDNode>(N->getOperand(1))->
4057                                   getConstantIntValue(), dl,
4058                                   N->getValueType(0));
4059     if (N->getValueType(0) == MVT::i64) {
4060       SDNode *Op =
4061         CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, MVT::Glue,
4062                                N0, ShiftAmt);
4063       CurDAG->SelectNodeTo(N, PPC::ADDZE8, MVT::i64, SDValue(Op, 0),
4064                            SDValue(Op, 1));
4065       return;
4066     } else {
4067       assert(N->getValueType(0) == MVT::i32 &&
4068              "Expecting i64 or i32 in PPCISD::SRA_ADDZE");
4069       SDNode *Op =
4070         CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue,
4071                                N0, ShiftAmt);
4072       CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, SDValue(Op, 0),
4073                            SDValue(Op, 1));
4074       return;
4075     }
4076   }
4077 
4078   case ISD::STORE: {
4079     // Change TLS initial-exec D-form stores to X-form stores.
4080     StoreSDNode *ST = cast<StoreSDNode>(N);
4081     if (EnableTLSOpt && PPCSubTarget->isELFv2ABI() &&
4082         ST->getAddressingMode() != ISD::PRE_INC)
4083       if (tryTLSXFormStore(ST))
4084         return;
4085     break;
4086   }
4087   case ISD::LOAD: {
4088     // Handle preincrement loads.
4089     LoadSDNode *LD = cast<LoadSDNode>(N);
4090     EVT LoadedVT = LD->getMemoryVT();
4091 
4092     // Normal loads are handled by code generated from the .td file.
4093     if (LD->getAddressingMode() != ISD::PRE_INC) {
4094       // Change TLS initial-exec D-form loads to X-form loads.
4095       if (EnableTLSOpt && PPCSubTarget->isELFv2ABI())
4096         if (tryTLSXFormLoad(LD))
4097           return;
4098       break;
4099     }
4100 
4101     SDValue Offset = LD->getOffset();
4102     if (Offset.getOpcode() == ISD::TargetConstant ||
4103         Offset.getOpcode() == ISD::TargetGlobalAddress) {
4104 
4105       unsigned Opcode;
4106       bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
4107       if (LD->getValueType(0) != MVT::i64) {
4108         // Handle PPC32 integer and normal FP loads.
4109         assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
4110         switch (LoadedVT.getSimpleVT().SimpleTy) {
4111           default: llvm_unreachable("Invalid PPC load type!");
4112           case MVT::f64: Opcode = PPC::LFDU; break;
4113           case MVT::f32: Opcode = PPC::LFSU; break;
4114           case MVT::i32: Opcode = PPC::LWZU; break;
4115           case MVT::i16: Opcode = isSExt ? PPC::LHAU : PPC::LHZU; break;
4116           case MVT::i1:
4117           case MVT::i8:  Opcode = PPC::LBZU; break;
4118         }
4119       } else {
4120         assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!");
4121         assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
4122         switch (LoadedVT.getSimpleVT().SimpleTy) {
4123           default: llvm_unreachable("Invalid PPC load type!");
4124           case MVT::i64: Opcode = PPC::LDU; break;
4125           case MVT::i32: Opcode = PPC::LWZU8; break;
4126           case MVT::i16: Opcode = isSExt ? PPC::LHAU8 : PPC::LHZU8; break;
4127           case MVT::i1:
4128           case MVT::i8:  Opcode = PPC::LBZU8; break;
4129         }
4130       }
4131 
4132       SDValue Chain = LD->getChain();
4133       SDValue Base = LD->getBasePtr();
4134       SDValue Ops[] = { Offset, Base, Chain };
4135       SDNode *MN = CurDAG->getMachineNode(
4136           Opcode, dl, LD->getValueType(0),
4137           PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops);
4138       transferMemOperands(N, MN);
4139       ReplaceNode(N, MN);
4140       return;
4141     } else {
4142       unsigned Opcode;
4143       bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
4144       if (LD->getValueType(0) != MVT::i64) {
4145         // Handle PPC32 integer and normal FP loads.
4146         assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
4147         switch (LoadedVT.getSimpleVT().SimpleTy) {
4148           default: llvm_unreachable("Invalid PPC load type!");
4149           case MVT::v4f64: Opcode = PPC::QVLFDUX; break; // QPX
4150           case MVT::v4f32: Opcode = PPC::QVLFSUX; break; // QPX
4151           case MVT::f64: Opcode = PPC::LFDUX; break;
4152           case MVT::f32: Opcode = PPC::LFSUX; break;
4153           case MVT::i32: Opcode = PPC::LWZUX; break;
4154           case MVT::i16: Opcode = isSExt ? PPC::LHAUX : PPC::LHZUX; break;
4155           case MVT::i1:
4156           case MVT::i8:  Opcode = PPC::LBZUX; break;
4157         }
4158       } else {
4159         assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!");
4160         assert((!isSExt || LoadedVT == MVT::i16 || LoadedVT == MVT::i32) &&
4161                "Invalid sext update load");
4162         switch (LoadedVT.getSimpleVT().SimpleTy) {
4163           default: llvm_unreachable("Invalid PPC load type!");
4164           case MVT::i64: Opcode = PPC::LDUX; break;
4165           case MVT::i32: Opcode = isSExt ? PPC::LWAUX  : PPC::LWZUX8; break;
4166           case MVT::i16: Opcode = isSExt ? PPC::LHAUX8 : PPC::LHZUX8; break;
4167           case MVT::i1:
4168           case MVT::i8:  Opcode = PPC::LBZUX8; break;
4169         }
4170       }
4171 
4172       SDValue Chain = LD->getChain();
4173       SDValue Base = LD->getBasePtr();
4174       SDValue Ops[] = { Base, Offset, Chain };
4175       SDNode *MN = CurDAG->getMachineNode(
4176           Opcode, dl, LD->getValueType(0),
4177           PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops);
4178       transferMemOperands(N, MN);
4179       ReplaceNode(N, MN);
4180       return;
4181     }
4182   }
4183 
4184   case ISD::AND: {
4185     unsigned Imm, Imm2, SH, MB, ME;
4186     uint64_t Imm64;
4187 
4188     // If this is an and of a value rotated between 0 and 31 bits and then and'd
4189     // with a mask, emit rlwinm
4190     if (isInt32Immediate(N->getOperand(1), Imm) &&
4191         isRotateAndMask(N->getOperand(0).getNode(), Imm, false, SH, MB, ME)) {
4192       SDValue Val = N->getOperand(0).getOperand(0);
4193       SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl),
4194                         getI32Imm(ME, dl) };
4195       CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4196       return;
4197     }
4198     // If this is just a masked value where the input is not handled above, and
4199     // is not a rotate-left (handled by a pattern in the .td file), emit rlwinm
4200     if (isInt32Immediate(N->getOperand(1), Imm) &&
4201         isRunOfOnes(Imm, MB, ME) &&
4202         N->getOperand(0).getOpcode() != ISD::ROTL) {
4203       SDValue Val = N->getOperand(0);
4204       SDValue Ops[] = { Val, getI32Imm(0, dl), getI32Imm(MB, dl),
4205                         getI32Imm(ME, dl) };
4206       CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4207       return;
4208     }
4209     // If this is a 64-bit zero-extension mask, emit rldicl.
4210     if (isInt64Immediate(N->getOperand(1).getNode(), Imm64) &&
4211         isMask_64(Imm64)) {
4212       SDValue Val = N->getOperand(0);
4213       MB = 64 - countTrailingOnes(Imm64);
4214       SH = 0;
4215 
4216       if (Val.getOpcode() == ISD::ANY_EXTEND) {
4217         auto Op0 = Val.getOperand(0);
4218         if ( Op0.getOpcode() == ISD::SRL &&
4219            isInt32Immediate(Op0.getOperand(1).getNode(), Imm) && Imm <= MB) {
4220 
4221            auto ResultType = Val.getNode()->getValueType(0);
4222            auto ImDef = CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl,
4223                                                ResultType);
4224            SDValue IDVal (ImDef, 0);
4225 
4226            Val = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl,
4227                          ResultType, IDVal, Op0.getOperand(0),
4228                          getI32Imm(1, dl)), 0);
4229            SH = 64 - Imm;
4230         }
4231       }
4232 
4233       // If the operand is a logical right shift, we can fold it into this
4234       // instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb)
4235       // for n <= mb. The right shift is really a left rotate followed by a
4236       // mask, and this mask is a more-restrictive sub-mask of the mask implied
4237       // by the shift.
4238       if (Val.getOpcode() == ISD::SRL &&
4239           isInt32Immediate(Val.getOperand(1).getNode(), Imm) && Imm <= MB) {
4240         assert(Imm < 64 && "Illegal shift amount");
4241         Val = Val.getOperand(0);
4242         SH = 64 - Imm;
4243       }
4244 
4245       SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl) };
4246       CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);
4247       return;
4248     }
4249     // If this is a negated 64-bit zero-extension mask,
4250     // i.e. the immediate is a sequence of ones from most significant side
4251     // and all zero for reminder, we should use rldicr.
4252     if (isInt64Immediate(N->getOperand(1).getNode(), Imm64) &&
4253         isMask_64(~Imm64)) {
4254       SDValue Val = N->getOperand(0);
4255       MB = 63 - countTrailingOnes(~Imm64);
4256       SH = 0;
4257       SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl) };
4258       CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops);
4259       return;
4260     }
4261 
4262     // AND X, 0 -> 0, not "rlwinm 32".
4263     if (isInt32Immediate(N->getOperand(1), Imm) && (Imm == 0)) {
4264       ReplaceUses(SDValue(N, 0), N->getOperand(1));
4265       return;
4266     }
4267     // ISD::OR doesn't get all the bitfield insertion fun.
4268     // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) might be a
4269     // bitfield insert.
4270     if (isInt32Immediate(N->getOperand(1), Imm) &&
4271         N->getOperand(0).getOpcode() == ISD::OR &&
4272         isInt32Immediate(N->getOperand(0).getOperand(1), Imm2)) {
4273       // The idea here is to check whether this is equivalent to:
4274       //   (c1 & m) | (x & ~m)
4275       // where m is a run-of-ones mask. The logic here is that, for each bit in
4276       // c1 and c2:
4277       //  - if both are 1, then the output will be 1.
4278       //  - if both are 0, then the output will be 0.
4279       //  - if the bit in c1 is 0, and the bit in c2 is 1, then the output will
4280       //    come from x.
4281       //  - if the bit in c1 is 1, and the bit in c2 is 0, then the output will
4282       //    be 0.
4283       //  If that last condition is never the case, then we can form m from the
4284       //  bits that are the same between c1 and c2.
4285       unsigned MB, ME;
4286       if (isRunOfOnes(~(Imm^Imm2), MB, ME) && !(~Imm & Imm2)) {
4287         SDValue Ops[] = { N->getOperand(0).getOperand(0),
4288                             N->getOperand(0).getOperand(1),
4289                             getI32Imm(0, dl), getI32Imm(MB, dl),
4290                             getI32Imm(ME, dl) };
4291         ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops));
4292         return;
4293       }
4294     }
4295 
4296     // Other cases are autogenerated.
4297     break;
4298   }
4299   case ISD::OR: {
4300     if (N->getValueType(0) == MVT::i32)
4301       if (tryBitfieldInsert(N))
4302         return;
4303 
4304     int16_t Imm;
4305     if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&
4306         isIntS16Immediate(N->getOperand(1), Imm)) {
4307       KnownBits LHSKnown;
4308       CurDAG->computeKnownBits(N->getOperand(0), LHSKnown);
4309 
4310       // If this is equivalent to an add, then we can fold it with the
4311       // FrameIndex calculation.
4312       if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)Imm) == ~0ULL) {
4313         selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm);
4314         return;
4315       }
4316     }
4317 
4318     // OR with a 32-bit immediate can be handled by ori + oris
4319     // without creating an immediate in a GPR.
4320     uint64_t Imm64 = 0;
4321     bool IsPPC64 = PPCSubTarget->isPPC64();
4322     if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) &&
4323         (Imm64 & ~0xFFFFFFFFuLL) == 0) {
4324       // If ImmHi (ImmHi) is zero, only one ori (oris) is generated later.
4325       uint64_t ImmHi = Imm64 >> 16;
4326       uint64_t ImmLo = Imm64 & 0xFFFF;
4327       if (ImmHi != 0 && ImmLo != 0) {
4328         SDNode *Lo = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
4329                                             N->getOperand(0),
4330                                             getI16Imm(ImmLo, dl));
4331         SDValue Ops1[] = { SDValue(Lo, 0), getI16Imm(ImmHi, dl)};
4332         CurDAG->SelectNodeTo(N, PPC::ORIS8, MVT::i64, Ops1);
4333         return;
4334       }
4335     }
4336 
4337     // Other cases are autogenerated.
4338     break;
4339   }
4340   case ISD::XOR: {
4341     // XOR with a 32-bit immediate can be handled by xori + xoris
4342     // without creating an immediate in a GPR.
4343     uint64_t Imm64 = 0;
4344     bool IsPPC64 = PPCSubTarget->isPPC64();
4345     if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) &&
4346         (Imm64 & ~0xFFFFFFFFuLL) == 0) {
4347       // If ImmHi (ImmHi) is zero, only one xori (xoris) is generated later.
4348       uint64_t ImmHi = Imm64 >> 16;
4349       uint64_t ImmLo = Imm64 & 0xFFFF;
4350       if (ImmHi != 0 && ImmLo != 0) {
4351         SDNode *Lo = CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,
4352                                             N->getOperand(0),
4353                                             getI16Imm(ImmLo, dl));
4354         SDValue Ops1[] = { SDValue(Lo, 0), getI16Imm(ImmHi, dl)};
4355         CurDAG->SelectNodeTo(N, PPC::XORIS8, MVT::i64, Ops1);
4356         return;
4357       }
4358     }
4359 
4360     break;
4361   }
4362   case ISD::ADD: {
4363     int16_t Imm;
4364     if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&
4365         isIntS16Immediate(N->getOperand(1), Imm)) {
4366       selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm);
4367       return;
4368     }
4369 
4370     break;
4371   }
4372   case ISD::SHL: {
4373     unsigned Imm, SH, MB, ME;
4374     if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
4375         isRotateAndMask(N, Imm, true, SH, MB, ME)) {
4376       SDValue Ops[] = { N->getOperand(0).getOperand(0),
4377                           getI32Imm(SH, dl), getI32Imm(MB, dl),
4378                           getI32Imm(ME, dl) };
4379       CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4380       return;
4381     }
4382 
4383     // Other cases are autogenerated.
4384     break;
4385   }
4386   case ISD::SRL: {
4387     unsigned Imm, SH, MB, ME;
4388     if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
4389         isRotateAndMask(N, Imm, true, SH, MB, ME)) {
4390       SDValue Ops[] = { N->getOperand(0).getOperand(0),
4391                           getI32Imm(SH, dl), getI32Imm(MB, dl),
4392                           getI32Imm(ME, dl) };
4393       CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4394       return;
4395     }
4396 
4397     // Other cases are autogenerated.
4398     break;
4399   }
4400   // FIXME: Remove this once the ANDI glue bug is fixed:
4401   case PPCISD::ANDIo_1_EQ_BIT:
4402   case PPCISD::ANDIo_1_GT_BIT: {
4403     if (!ANDIGlueBug)
4404       break;
4405 
4406     EVT InVT = N->getOperand(0).getValueType();
4407     assert((InVT == MVT::i64 || InVT == MVT::i32) &&
4408            "Invalid input type for ANDIo_1_EQ_BIT");
4409 
4410     unsigned Opcode = (InVT == MVT::i64) ? PPC::ANDIo8 : PPC::ANDIo;
4411     SDValue AndI(CurDAG->getMachineNode(Opcode, dl, InVT, MVT::Glue,
4412                                         N->getOperand(0),
4413                                         CurDAG->getTargetConstant(1, dl, InVT)),
4414                  0);
4415     SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32);
4416     SDValue SRIdxVal =
4417       CurDAG->getTargetConstant(N->getOpcode() == PPCISD::ANDIo_1_EQ_BIT ?
4418                                 PPC::sub_eq : PPC::sub_gt, dl, MVT::i32);
4419 
4420     CurDAG->SelectNodeTo(N, TargetOpcode::EXTRACT_SUBREG, MVT::i1, CR0Reg,
4421                          SRIdxVal, SDValue(AndI.getNode(), 1) /* glue */);
4422     return;
4423   }
4424   case ISD::SELECT_CC: {
4425     ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
4426     EVT PtrVT =
4427         CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout());
4428     bool isPPC64 = (PtrVT == MVT::i64);
4429 
4430     // If this is a select of i1 operands, we'll pattern match it.
4431     if (PPCSubTarget->useCRBits() &&
4432         N->getOperand(0).getValueType() == MVT::i1)
4433       break;
4434 
4435     // Handle the setcc cases here.  select_cc lhs, 0, 1, 0, cc
4436     if (!isPPC64)
4437       if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)))
4438         if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N->getOperand(2)))
4439           if (ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N->getOperand(3)))
4440             if (N1C->isNullValue() && N3C->isNullValue() &&
4441                 N2C->getZExtValue() == 1ULL && CC == ISD::SETNE &&
4442                 // FIXME: Implement this optzn for PPC64.
4443                 N->getValueType(0) == MVT::i32) {
4444               SDNode *Tmp =
4445                 CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
4446                                        N->getOperand(0), getI32Imm(~0U, dl));
4447               CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(Tmp, 0),
4448                                    N->getOperand(0), SDValue(Tmp, 1));
4449               return;
4450             }
4451 
4452     SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl);
4453 
4454     if (N->getValueType(0) == MVT::i1) {
4455       // An i1 select is: (c & t) | (!c & f).
4456       bool Inv;
4457       unsigned Idx = getCRIdxForSetCC(CC, Inv);
4458 
4459       unsigned SRI;
4460       switch (Idx) {
4461       default: llvm_unreachable("Invalid CC index");
4462       case 0: SRI = PPC::sub_lt; break;
4463       case 1: SRI = PPC::sub_gt; break;
4464       case 2: SRI = PPC::sub_eq; break;
4465       case 3: SRI = PPC::sub_un; break;
4466       }
4467 
4468       SDValue CCBit = CurDAG->getTargetExtractSubreg(SRI, dl, MVT::i1, CCReg);
4469 
4470       SDValue NotCCBit(CurDAG->getMachineNode(PPC::CRNOR, dl, MVT::i1,
4471                                               CCBit, CCBit), 0);
4472       SDValue C =    Inv ? NotCCBit : CCBit,
4473               NotC = Inv ? CCBit    : NotCCBit;
4474 
4475       SDValue CAndT(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1,
4476                                            C, N->getOperand(2)), 0);
4477       SDValue NotCAndF(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1,
4478                                               NotC, N->getOperand(3)), 0);
4479 
4480       CurDAG->SelectNodeTo(N, PPC::CROR, MVT::i1, CAndT, NotCAndF);
4481       return;
4482     }
4483 
4484     unsigned BROpc = getPredicateForSetCC(CC);
4485 
4486     unsigned SelectCCOp;
4487     if (N->getValueType(0) == MVT::i32)
4488       SelectCCOp = PPC::SELECT_CC_I4;
4489     else if (N->getValueType(0) == MVT::i64)
4490       SelectCCOp = PPC::SELECT_CC_I8;
4491     else if (N->getValueType(0) == MVT::f32)
4492       if (PPCSubTarget->hasP8Vector())
4493         SelectCCOp = PPC::SELECT_CC_VSSRC;
4494       else
4495         SelectCCOp = PPC::SELECT_CC_F4;
4496     else if (N->getValueType(0) == MVT::f64)
4497       if (PPCSubTarget->hasVSX())
4498         SelectCCOp = PPC::SELECT_CC_VSFRC;
4499       else
4500         SelectCCOp = PPC::SELECT_CC_F8;
4501     else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4f64)
4502       SelectCCOp = PPC::SELECT_CC_QFRC;
4503     else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4f32)
4504       SelectCCOp = PPC::SELECT_CC_QSRC;
4505     else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4i1)
4506       SelectCCOp = PPC::SELECT_CC_QBRC;
4507     else if (N->getValueType(0) == MVT::v2f64 ||
4508              N->getValueType(0) == MVT::v2i64)
4509       SelectCCOp = PPC::SELECT_CC_VSRC;
4510     else
4511       SelectCCOp = PPC::SELECT_CC_VRRC;
4512 
4513     SDValue Ops[] = { CCReg, N->getOperand(2), N->getOperand(3),
4514                         getI32Imm(BROpc, dl) };
4515     CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops);
4516     return;
4517   }
4518   case ISD::VSELECT:
4519     if (PPCSubTarget->hasVSX()) {
4520       SDValue Ops[] = { N->getOperand(2), N->getOperand(1), N->getOperand(0) };
4521       CurDAG->SelectNodeTo(N, PPC::XXSEL, N->getValueType(0), Ops);
4522       return;
4523     }
4524     break;
4525 
4526   case ISD::VECTOR_SHUFFLE:
4527     if (PPCSubTarget->hasVSX() && (N->getValueType(0) == MVT::v2f64 ||
4528                                   N->getValueType(0) == MVT::v2i64)) {
4529       ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
4530 
4531       SDValue Op1 = N->getOperand(SVN->getMaskElt(0) < 2 ? 0 : 1),
4532               Op2 = N->getOperand(SVN->getMaskElt(1) < 2 ? 0 : 1);
4533       unsigned DM[2];
4534 
4535       for (int i = 0; i < 2; ++i)
4536         if (SVN->getMaskElt(i) <= 0 || SVN->getMaskElt(i) == 2)
4537           DM[i] = 0;
4538         else
4539           DM[i] = 1;
4540 
4541       if (Op1 == Op2 && DM[0] == 0 && DM[1] == 0 &&
4542           Op1.getOpcode() == ISD::SCALAR_TO_VECTOR &&
4543           isa<LoadSDNode>(Op1.getOperand(0))) {
4544         LoadSDNode *LD = cast<LoadSDNode>(Op1.getOperand(0));
4545         SDValue Base, Offset;
4546 
4547         if (LD->isUnindexed() && LD->hasOneUse() && Op1.hasOneUse() &&
4548             (LD->getMemoryVT() == MVT::f64 ||
4549              LD->getMemoryVT() == MVT::i64) &&
4550             SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) {
4551           SDValue Chain = LD->getChain();
4552           SDValue Ops[] = { Base, Offset, Chain };
4553           MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
4554           MemOp[0] = LD->getMemOperand();
4555           SDNode *NewN = CurDAG->SelectNodeTo(N, PPC::LXVDSX,
4556                                               N->getValueType(0), Ops);
4557           cast<MachineSDNode>(NewN)->setMemRefs(MemOp, MemOp + 1);
4558           return;
4559         }
4560       }
4561 
4562       // For little endian, we must swap the input operands and adjust
4563       // the mask elements (reverse and invert them).
4564       if (PPCSubTarget->isLittleEndian()) {
4565         std::swap(Op1, Op2);
4566         unsigned tmp = DM[0];
4567         DM[0] = 1 - DM[1];
4568         DM[1] = 1 - tmp;
4569       }
4570 
4571       SDValue DMV = CurDAG->getTargetConstant(DM[1] | (DM[0] << 1), dl,
4572                                               MVT::i32);
4573       SDValue Ops[] = { Op1, Op2, DMV };
4574       CurDAG->SelectNodeTo(N, PPC::XXPERMDI, N->getValueType(0), Ops);
4575       return;
4576     }
4577 
4578     break;
4579   case PPCISD::BDNZ:
4580   case PPCISD::BDZ: {
4581     bool IsPPC64 = PPCSubTarget->isPPC64();
4582     SDValue Ops[] = { N->getOperand(1), N->getOperand(0) };
4583     CurDAG->SelectNodeTo(N, N->getOpcode() == PPCISD::BDNZ
4584                                 ? (IsPPC64 ? PPC::BDNZ8 : PPC::BDNZ)
4585                                 : (IsPPC64 ? PPC::BDZ8 : PPC::BDZ),
4586                          MVT::Other, Ops);
4587     return;
4588   }
4589   case PPCISD::COND_BRANCH: {
4590     // Op #0 is the Chain.
4591     // Op #1 is the PPC::PRED_* number.
4592     // Op #2 is the CR#
4593     // Op #3 is the Dest MBB
4594     // Op #4 is the Flag.
4595     // Prevent PPC::PRED_* from being selected into LI.
4596     unsigned PCC = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
4597     if (EnableBranchHint)
4598       PCC |= getBranchHint(PCC, FuncInfo, N->getOperand(3));
4599 
4600     SDValue Pred = getI32Imm(PCC, dl);
4601     SDValue Ops[] = { Pred, N->getOperand(2), N->getOperand(3),
4602       N->getOperand(0), N->getOperand(4) };
4603     CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
4604     return;
4605   }
4606   case ISD::BR_CC: {
4607     ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
4608     unsigned PCC = getPredicateForSetCC(CC);
4609 
4610     if (N->getOperand(2).getValueType() == MVT::i1) {
4611       unsigned Opc;
4612       bool Swap;
4613       switch (PCC) {
4614       default: llvm_unreachable("Unexpected Boolean-operand predicate");
4615       case PPC::PRED_LT: Opc = PPC::CRANDC; Swap = true;  break;
4616       case PPC::PRED_LE: Opc = PPC::CRORC;  Swap = true;  break;
4617       case PPC::PRED_EQ: Opc = PPC::CREQV;  Swap = false; break;
4618       case PPC::PRED_GE: Opc = PPC::CRORC;  Swap = false; break;
4619       case PPC::PRED_GT: Opc = PPC::CRANDC; Swap = false; break;
4620       case PPC::PRED_NE: Opc = PPC::CRXOR;  Swap = false; break;
4621       }
4622 
4623       SDValue BitComp(CurDAG->getMachineNode(Opc, dl, MVT::i1,
4624                                              N->getOperand(Swap ? 3 : 2),
4625                                              N->getOperand(Swap ? 2 : 3)), 0);
4626       CurDAG->SelectNodeTo(N, PPC::BC, MVT::Other, BitComp, N->getOperand(4),
4627                            N->getOperand(0));
4628       return;
4629     }
4630 
4631     if (EnableBranchHint)
4632       PCC |= getBranchHint(PCC, FuncInfo, N->getOperand(4));
4633 
4634     SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl);
4635     SDValue Ops[] = { getI32Imm(PCC, dl), CondCode,
4636                         N->getOperand(4), N->getOperand(0) };
4637     CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
4638     return;
4639   }
4640   case ISD::BRIND: {
4641     // FIXME: Should custom lower this.
4642     SDValue Chain = N->getOperand(0);
4643     SDValue Target = N->getOperand(1);
4644     unsigned Opc = Target.getValueType() == MVT::i32 ? PPC::MTCTR : PPC::MTCTR8;
4645     unsigned Reg = Target.getValueType() == MVT::i32 ? PPC::BCTR : PPC::BCTR8;
4646     Chain = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Target,
4647                                            Chain), 0);
4648     CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain);
4649     return;
4650   }
4651   case PPCISD::TOC_ENTRY: {
4652     assert ((PPCSubTarget->isPPC64() || PPCSubTarget->isSVR4ABI()) &&
4653             "Only supported for 64-bit ABI and 32-bit SVR4");
4654     if (PPCSubTarget->isSVR4ABI() && !PPCSubTarget->isPPC64()) {
4655       SDValue GA = N->getOperand(0);
4656       SDNode *MN = CurDAG->getMachineNode(PPC::LWZtoc, dl, MVT::i32, GA,
4657                                           N->getOperand(1));
4658       transferMemOperands(N, MN);
4659       ReplaceNode(N, MN);
4660       return;
4661     }
4662 
4663     // For medium and large code model, we generate two instructions as
4664     // described below.  Otherwise we allow SelectCodeCommon to handle this,
4665     // selecting one of LDtoc, LDtocJTI, LDtocCPT, and LDtocBA.
4666     CodeModel::Model CModel = TM.getCodeModel();
4667     if (CModel != CodeModel::Medium && CModel != CodeModel::Large)
4668       break;
4669 
4670     // The first source operand is a TargetGlobalAddress or a TargetJumpTable.
4671     // If it must be toc-referenced according to PPCSubTarget, we generate:
4672     //   LDtocL(@sym, ADDIStocHA(%x2, @sym))
4673     // Otherwise we generate:
4674     //   ADDItocL(ADDIStocHA(%x2, @sym), @sym)
4675     SDValue GA = N->getOperand(0);
4676     SDValue TOCbase = N->getOperand(1);
4677     SDNode *Tmp = CurDAG->getMachineNode(PPC::ADDIStocHA, dl, MVT::i64,
4678                                          TOCbase, GA);
4679 
4680     if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA) ||
4681         CModel == CodeModel::Large) {
4682       SDNode *MN = CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA,
4683                                           SDValue(Tmp, 0));
4684       transferMemOperands(N, MN);
4685       ReplaceNode(N, MN);
4686       return;
4687     }
4688 
4689     if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) {
4690       const GlobalValue *GV = G->getGlobal();
4691       unsigned char GVFlags = PPCSubTarget->classifyGlobalReference(GV);
4692       if (GVFlags & PPCII::MO_NLP_FLAG) {
4693         SDNode *MN = CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA,
4694                                             SDValue(Tmp, 0));
4695         transferMemOperands(N, MN);
4696         ReplaceNode(N, MN);
4697         return;
4698       }
4699     }
4700 
4701     ReplaceNode(N, CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64,
4702                                           SDValue(Tmp, 0), GA));
4703     return;
4704   }
4705   case PPCISD::PPC32_PICGOT:
4706     // Generate a PIC-safe GOT reference.
4707     assert(!PPCSubTarget->isPPC64() && PPCSubTarget->isSVR4ABI() &&
4708       "PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4");
4709     CurDAG->SelectNodeTo(N, PPC::PPC32PICGOT,
4710                          PPCLowering->getPointerTy(CurDAG->getDataLayout()),
4711                          MVT::i32);
4712     return;
4713 
4714   case PPCISD::VADD_SPLAT: {
4715     // This expands into one of three sequences, depending on whether
4716     // the first operand is odd or even, positive or negative.
4717     assert(isa<ConstantSDNode>(N->getOperand(0)) &&
4718            isa<ConstantSDNode>(N->getOperand(1)) &&
4719            "Invalid operand on VADD_SPLAT!");
4720 
4721     int Elt     = N->getConstantOperandVal(0);
4722     int EltSize = N->getConstantOperandVal(1);
4723     unsigned Opc1, Opc2, Opc3;
4724     EVT VT;
4725 
4726     if (EltSize == 1) {
4727       Opc1 = PPC::VSPLTISB;
4728       Opc2 = PPC::VADDUBM;
4729       Opc3 = PPC::VSUBUBM;
4730       VT = MVT::v16i8;
4731     } else if (EltSize == 2) {
4732       Opc1 = PPC::VSPLTISH;
4733       Opc2 = PPC::VADDUHM;
4734       Opc3 = PPC::VSUBUHM;
4735       VT = MVT::v8i16;
4736     } else {
4737       assert(EltSize == 4 && "Invalid element size on VADD_SPLAT!");
4738       Opc1 = PPC::VSPLTISW;
4739       Opc2 = PPC::VADDUWM;
4740       Opc3 = PPC::VSUBUWM;
4741       VT = MVT::v4i32;
4742     }
4743 
4744     if ((Elt & 1) == 0) {
4745       // Elt is even, in the range [-32,-18] + [16,30].
4746       //
4747       // Convert: VADD_SPLAT elt, size
4748       // Into:    tmp = VSPLTIS[BHW] elt
4749       //          VADDU[BHW]M tmp, tmp
4750       // Where:   [BHW] = B for size = 1, H for size = 2, W for size = 4
4751       SDValue EltVal = getI32Imm(Elt >> 1, dl);
4752       SDNode *Tmp = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
4753       SDValue TmpVal = SDValue(Tmp, 0);
4754       ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, TmpVal, TmpVal));
4755       return;
4756     } else if (Elt > 0) {
4757       // Elt is odd and positive, in the range [17,31].
4758       //
4759       // Convert: VADD_SPLAT elt, size
4760       // Into:    tmp1 = VSPLTIS[BHW] elt-16
4761       //          tmp2 = VSPLTIS[BHW] -16
4762       //          VSUBU[BHW]M tmp1, tmp2
4763       SDValue EltVal = getI32Imm(Elt - 16, dl);
4764       SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
4765       EltVal = getI32Imm(-16, dl);
4766       SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
4767       ReplaceNode(N, CurDAG->getMachineNode(Opc3, dl, VT, SDValue(Tmp1, 0),
4768                                             SDValue(Tmp2, 0)));
4769       return;
4770     } else {
4771       // Elt is odd and negative, in the range [-31,-17].
4772       //
4773       // Convert: VADD_SPLAT elt, size
4774       // Into:    tmp1 = VSPLTIS[BHW] elt+16
4775       //          tmp2 = VSPLTIS[BHW] -16
4776       //          VADDU[BHW]M tmp1, tmp2
4777       SDValue EltVal = getI32Imm(Elt + 16, dl);
4778       SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
4779       EltVal = getI32Imm(-16, dl);
4780       SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
4781       ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, SDValue(Tmp1, 0),
4782                                             SDValue(Tmp2, 0)));
4783       return;
4784     }
4785   }
4786   }
4787 
4788   SelectCode(N);
4789 }
4790 
4791 // If the target supports the cmpb instruction, do the idiom recognition here.
4792 // We don't do this as a DAG combine because we don't want to do it as nodes
4793 // are being combined (because we might miss part of the eventual idiom). We
4794 // don't want to do it during instruction selection because we want to reuse
4795 // the logic for lowering the masking operations already part of the
4796 // instruction selector.
4797 SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) {
4798   SDLoc dl(N);
4799 
4800   assert(N->getOpcode() == ISD::OR &&
4801          "Only OR nodes are supported for CMPB");
4802 
4803   SDValue Res;
4804   if (!PPCSubTarget->hasCMPB())
4805     return Res;
4806 
4807   if (N->getValueType(0) != MVT::i32 &&
4808       N->getValueType(0) != MVT::i64)
4809     return Res;
4810 
4811   EVT VT = N->getValueType(0);
4812 
4813   SDValue RHS, LHS;
4814   bool BytesFound[8] = {false, false, false, false, false, false, false, false};
4815   uint64_t Mask = 0, Alt = 0;
4816 
4817   auto IsByteSelectCC = [this](SDValue O, unsigned &b,
4818                                uint64_t &Mask, uint64_t &Alt,
4819                                SDValue &LHS, SDValue &RHS) {
4820     if (O.getOpcode() != ISD::SELECT_CC)
4821       return false;
4822     ISD::CondCode CC = cast<CondCodeSDNode>(O.getOperand(4))->get();
4823 
4824     if (!isa<ConstantSDNode>(O.getOperand(2)) ||
4825         !isa<ConstantSDNode>(O.getOperand(3)))
4826       return false;
4827 
4828     uint64_t PM = O.getConstantOperandVal(2);
4829     uint64_t PAlt = O.getConstantOperandVal(3);
4830     for (b = 0; b < 8; ++b) {
4831       uint64_t Mask = UINT64_C(0xFF) << (8*b);
4832       if (PM && (PM & Mask) == PM && (PAlt & Mask) == PAlt)
4833         break;
4834     }
4835 
4836     if (b == 8)
4837       return false;
4838     Mask |= PM;
4839     Alt  |= PAlt;
4840 
4841     if (!isa<ConstantSDNode>(O.getOperand(1)) ||
4842         O.getConstantOperandVal(1) != 0) {
4843       SDValue Op0 = O.getOperand(0), Op1 = O.getOperand(1);
4844       if (Op0.getOpcode() == ISD::TRUNCATE)
4845         Op0 = Op0.getOperand(0);
4846       if (Op1.getOpcode() == ISD::TRUNCATE)
4847         Op1 = Op1.getOperand(0);
4848 
4849       if (Op0.getOpcode() == ISD::SRL && Op1.getOpcode() == ISD::SRL &&
4850           Op0.getOperand(1) == Op1.getOperand(1) && CC == ISD::SETEQ &&
4851           isa<ConstantSDNode>(Op0.getOperand(1))) {
4852 
4853         unsigned Bits = Op0.getValueSizeInBits();
4854         if (b != Bits/8-1)
4855           return false;
4856         if (Op0.getConstantOperandVal(1) != Bits-8)
4857           return false;
4858 
4859         LHS = Op0.getOperand(0);
4860         RHS = Op1.getOperand(0);
4861         return true;
4862       }
4863 
4864       // When we have small integers (i16 to be specific), the form present
4865       // post-legalization uses SETULT in the SELECT_CC for the
4866       // higher-order byte, depending on the fact that the
4867       // even-higher-order bytes are known to all be zero, for example:
4868       //   select_cc (xor $lhs, $rhs), 256, 65280, 0, setult
4869       // (so when the second byte is the same, because all higher-order
4870       // bits from bytes 3 and 4 are known to be zero, the result of the
4871       // xor can be at most 255)
4872       if (Op0.getOpcode() == ISD::XOR && CC == ISD::SETULT &&
4873           isa<ConstantSDNode>(O.getOperand(1))) {
4874 
4875         uint64_t ULim = O.getConstantOperandVal(1);
4876         if (ULim != (UINT64_C(1) << b*8))
4877           return false;
4878 
4879         // Now we need to make sure that the upper bytes are known to be
4880         // zero.
4881         unsigned Bits = Op0.getValueSizeInBits();
4882         if (!CurDAG->MaskedValueIsZero(
4883                 Op0, APInt::getHighBitsSet(Bits, Bits - (b + 1) * 8)))
4884           return false;
4885 
4886         LHS = Op0.getOperand(0);
4887         RHS = Op0.getOperand(1);
4888         return true;
4889       }
4890 
4891       return false;
4892     }
4893 
4894     if (CC != ISD::SETEQ)
4895       return false;
4896 
4897     SDValue Op = O.getOperand(0);
4898     if (Op.getOpcode() == ISD::AND) {
4899       if (!isa<ConstantSDNode>(Op.getOperand(1)))
4900         return false;
4901       if (Op.getConstantOperandVal(1) != (UINT64_C(0xFF) << (8*b)))
4902         return false;
4903 
4904       SDValue XOR = Op.getOperand(0);
4905       if (XOR.getOpcode() == ISD::TRUNCATE)
4906         XOR = XOR.getOperand(0);
4907       if (XOR.getOpcode() != ISD::XOR)
4908         return false;
4909 
4910       LHS = XOR.getOperand(0);
4911       RHS = XOR.getOperand(1);
4912       return true;
4913     } else if (Op.getOpcode() == ISD::SRL) {
4914       if (!isa<ConstantSDNode>(Op.getOperand(1)))
4915         return false;
4916       unsigned Bits = Op.getValueSizeInBits();
4917       if (b != Bits/8-1)
4918         return false;
4919       if (Op.getConstantOperandVal(1) != Bits-8)
4920         return false;
4921 
4922       SDValue XOR = Op.getOperand(0);
4923       if (XOR.getOpcode() == ISD::TRUNCATE)
4924         XOR = XOR.getOperand(0);
4925       if (XOR.getOpcode() != ISD::XOR)
4926         return false;
4927 
4928       LHS = XOR.getOperand(0);
4929       RHS = XOR.getOperand(1);
4930       return true;
4931     }
4932 
4933     return false;
4934   };
4935 
4936   SmallVector<SDValue, 8> Queue(1, SDValue(N, 0));
4937   while (!Queue.empty()) {
4938     SDValue V = Queue.pop_back_val();
4939 
4940     for (const SDValue &O : V.getNode()->ops()) {
4941       unsigned b;
4942       uint64_t M = 0, A = 0;
4943       SDValue OLHS, ORHS;
4944       if (O.getOpcode() == ISD::OR) {
4945         Queue.push_back(O);
4946       } else if (IsByteSelectCC(O, b, M, A, OLHS, ORHS)) {
4947         if (!LHS) {
4948           LHS = OLHS;
4949           RHS = ORHS;
4950           BytesFound[b] = true;
4951           Mask |= M;
4952           Alt  |= A;
4953         } else if ((LHS == ORHS && RHS == OLHS) ||
4954                    (RHS == ORHS && LHS == OLHS)) {
4955           BytesFound[b] = true;
4956           Mask |= M;
4957           Alt  |= A;
4958         } else {
4959           return Res;
4960         }
4961       } else {
4962         return Res;
4963       }
4964     }
4965   }
4966 
4967   unsigned LastB = 0, BCnt = 0;
4968   for (unsigned i = 0; i < 8; ++i)
4969     if (BytesFound[LastB]) {
4970       ++BCnt;
4971       LastB = i;
4972     }
4973 
4974   if (!LastB || BCnt < 2)
4975     return Res;
4976 
4977   // Because we'll be zero-extending the output anyway if don't have a specific
4978   // value for each input byte (via the Mask), we can 'anyext' the inputs.
4979   if (LHS.getValueType() != VT) {
4980     LHS = CurDAG->getAnyExtOrTrunc(LHS, dl, VT);
4981     RHS = CurDAG->getAnyExtOrTrunc(RHS, dl, VT);
4982   }
4983 
4984   Res = CurDAG->getNode(PPCISD::CMPB, dl, VT, LHS, RHS);
4985 
4986   bool NonTrivialMask = ((int64_t) Mask) != INT64_C(-1);
4987   if (NonTrivialMask && !Alt) {
4988     // Res = Mask & CMPB
4989     Res = CurDAG->getNode(ISD::AND, dl, VT, Res,
4990                           CurDAG->getConstant(Mask, dl, VT));
4991   } else if (Alt) {
4992     // Res = (CMPB & Mask) | (~CMPB & Alt)
4993     // Which, as suggested here:
4994     //   https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge
4995     // can be written as:
4996     // Res = Alt ^ ((Alt ^ Mask) & CMPB)
4997     // useful because the (Alt ^ Mask) can be pre-computed.
4998     Res = CurDAG->getNode(ISD::AND, dl, VT, Res,
4999                           CurDAG->getConstant(Mask ^ Alt, dl, VT));
5000     Res = CurDAG->getNode(ISD::XOR, dl, VT, Res,
5001                           CurDAG->getConstant(Alt, dl, VT));
5002   }
5003 
5004   return Res;
5005 }
5006 
5007 // When CR bit registers are enabled, an extension of an i1 variable to a i32
5008 // or i64 value is lowered in terms of a SELECT_I[48] operation, and thus
5009 // involves constant materialization of a 0 or a 1 or both. If the result of
5010 // the extension is then operated upon by some operator that can be constant
5011 // folded with a constant 0 or 1, and that constant can be materialized using
5012 // only one instruction (like a zero or one), then we should fold in those
5013 // operations with the select.
5014 void PPCDAGToDAGISel::foldBoolExts(SDValue &Res, SDNode *&N) {
5015   if (!PPCSubTarget->useCRBits())
5016     return;
5017 
5018   if (N->getOpcode() != ISD::ZERO_EXTEND &&
5019       N->getOpcode() != ISD::SIGN_EXTEND &&
5020       N->getOpcode() != ISD::ANY_EXTEND)
5021     return;
5022 
5023   if (N->getOperand(0).getValueType() != MVT::i1)
5024     return;
5025 
5026   if (!N->hasOneUse())
5027     return;
5028 
5029   SDLoc dl(N);
5030   EVT VT = N->getValueType(0);
5031   SDValue Cond = N->getOperand(0);
5032   SDValue ConstTrue =
5033     CurDAG->getConstant(N->getOpcode() == ISD::SIGN_EXTEND ? -1 : 1, dl, VT);
5034   SDValue ConstFalse = CurDAG->getConstant(0, dl, VT);
5035 
5036   do {
5037     SDNode *User = *N->use_begin();
5038     if (User->getNumOperands() != 2)
5039       break;
5040 
5041     auto TryFold = [this, N, User, dl](SDValue Val) {
5042       SDValue UserO0 = User->getOperand(0), UserO1 = User->getOperand(1);
5043       SDValue O0 = UserO0.getNode() == N ? Val : UserO0;
5044       SDValue O1 = UserO1.getNode() == N ? Val : UserO1;
5045 
5046       return CurDAG->FoldConstantArithmetic(User->getOpcode(), dl,
5047                                             User->getValueType(0),
5048                                             O0.getNode(), O1.getNode());
5049     };
5050 
5051     // FIXME: When the semantics of the interaction between select and undef
5052     // are clearly defined, it may turn out to be unnecessary to break here.
5053     SDValue TrueRes = TryFold(ConstTrue);
5054     if (!TrueRes || TrueRes.isUndef())
5055       break;
5056     SDValue FalseRes = TryFold(ConstFalse);
5057     if (!FalseRes || FalseRes.isUndef())
5058       break;
5059 
5060     // For us to materialize these using one instruction, we must be able to
5061     // represent them as signed 16-bit integers.
5062     uint64_t True  = cast<ConstantSDNode>(TrueRes)->getZExtValue(),
5063              False = cast<ConstantSDNode>(FalseRes)->getZExtValue();
5064     if (!isInt<16>(True) || !isInt<16>(False))
5065       break;
5066 
5067     // We can replace User with a new SELECT node, and try again to see if we
5068     // can fold the select with its user.
5069     Res = CurDAG->getSelect(dl, User->getValueType(0), Cond, TrueRes, FalseRes);
5070     N = User;
5071     ConstTrue = TrueRes;
5072     ConstFalse = FalseRes;
5073   } while (N->hasOneUse());
5074 }
5075 
5076 void PPCDAGToDAGISel::PreprocessISelDAG() {
5077   SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
5078   ++Position;
5079 
5080   bool MadeChange = false;
5081   while (Position != CurDAG->allnodes_begin()) {
5082     SDNode *N = &*--Position;
5083     if (N->use_empty())
5084       continue;
5085 
5086     SDValue Res;
5087     switch (N->getOpcode()) {
5088     default: break;
5089     case ISD::OR:
5090       Res = combineToCMPB(N);
5091       break;
5092     }
5093 
5094     if (!Res)
5095       foldBoolExts(Res, N);
5096 
5097     if (Res) {
5098       DEBUG(dbgs() << "PPC DAG preprocessing replacing:\nOld:    ");
5099       DEBUG(N->dump(CurDAG));
5100       DEBUG(dbgs() << "\nNew: ");
5101       DEBUG(Res.getNode()->dump(CurDAG));
5102       DEBUG(dbgs() << "\n");
5103 
5104       CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
5105       MadeChange = true;
5106     }
5107   }
5108 
5109   if (MadeChange)
5110     CurDAG->RemoveDeadNodes();
5111 }
5112 
5113 /// PostprocessISelDAG - Perform some late peephole optimizations
5114 /// on the DAG representation.
5115 void PPCDAGToDAGISel::PostprocessISelDAG() {
5116   // Skip peepholes at -O0.
5117   if (TM.getOptLevel() == CodeGenOpt::None)
5118     return;
5119 
5120   PeepholePPC64();
5121   PeepholeCROps();
5122   PeepholePPC64ZExt();
5123 }
5124 
5125 // Check if all users of this node will become isel where the second operand
5126 // is the constant zero. If this is so, and if we can negate the condition,
5127 // then we can flip the true and false operands. This will allow the zero to
5128 // be folded with the isel so that we don't need to materialize a register
5129 // containing zero.
5130 bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode *N) {
5131   for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
5132        UI != UE; ++UI) {
5133     SDNode *User = *UI;
5134     if (!User->isMachineOpcode())
5135       return false;
5136     if (User->getMachineOpcode() != PPC::SELECT_I4 &&
5137         User->getMachineOpcode() != PPC::SELECT_I8)
5138       return false;
5139 
5140     SDNode *Op2 = User->getOperand(2).getNode();
5141     if (!Op2->isMachineOpcode())
5142       return false;
5143 
5144     if (Op2->getMachineOpcode() != PPC::LI &&
5145         Op2->getMachineOpcode() != PPC::LI8)
5146       return false;
5147 
5148     ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op2->getOperand(0));
5149     if (!C)
5150       return false;
5151 
5152     if (!C->isNullValue())
5153       return false;
5154   }
5155 
5156   return true;
5157 }
5158 
5159 void PPCDAGToDAGISel::SwapAllSelectUsers(SDNode *N) {
5160   SmallVector<SDNode *, 4> ToReplace;
5161   for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
5162        UI != UE; ++UI) {
5163     SDNode *User = *UI;
5164     assert((User->getMachineOpcode() == PPC::SELECT_I4 ||
5165             User->getMachineOpcode() == PPC::SELECT_I8) &&
5166            "Must have all select users");
5167     ToReplace.push_back(User);
5168   }
5169 
5170   for (SmallVector<SDNode *, 4>::iterator UI = ToReplace.begin(),
5171        UE = ToReplace.end(); UI != UE; ++UI) {
5172     SDNode *User = *UI;
5173     SDNode *ResNode =
5174       CurDAG->getMachineNode(User->getMachineOpcode(), SDLoc(User),
5175                              User->getValueType(0), User->getOperand(0),
5176                              User->getOperand(2),
5177                              User->getOperand(1));
5178 
5179       DEBUG(dbgs() << "CR Peephole replacing:\nOld:    ");
5180       DEBUG(User->dump(CurDAG));
5181       DEBUG(dbgs() << "\nNew: ");
5182       DEBUG(ResNode->dump(CurDAG));
5183       DEBUG(dbgs() << "\n");
5184 
5185       ReplaceUses(User, ResNode);
5186   }
5187 }
5188 
5189 void PPCDAGToDAGISel::PeepholeCROps() {
5190   bool IsModified;
5191   do {
5192     IsModified = false;
5193     for (SDNode &Node : CurDAG->allnodes()) {
5194       MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node);
5195       if (!MachineNode || MachineNode->use_empty())
5196         continue;
5197       SDNode *ResNode = MachineNode;
5198 
5199       bool Op1Set   = false, Op1Unset = false,
5200            Op1Not   = false,
5201            Op2Set   = false, Op2Unset = false,
5202            Op2Not   = false;
5203 
5204       unsigned Opcode = MachineNode->getMachineOpcode();
5205       switch (Opcode) {
5206       default: break;
5207       case PPC::CRAND:
5208       case PPC::CRNAND:
5209       case PPC::CROR:
5210       case PPC::CRXOR:
5211       case PPC::CRNOR:
5212       case PPC::CREQV:
5213       case PPC::CRANDC:
5214       case PPC::CRORC: {
5215         SDValue Op = MachineNode->getOperand(1);
5216         if (Op.isMachineOpcode()) {
5217           if (Op.getMachineOpcode() == PPC::CRSET)
5218             Op2Set = true;
5219           else if (Op.getMachineOpcode() == PPC::CRUNSET)
5220             Op2Unset = true;
5221           else if (Op.getMachineOpcode() == PPC::CRNOR &&
5222                    Op.getOperand(0) == Op.getOperand(1))
5223             Op2Not = true;
5224         }
5225         LLVM_FALLTHROUGH;
5226       }
5227       case PPC::BC:
5228       case PPC::BCn:
5229       case PPC::SELECT_I4:
5230       case PPC::SELECT_I8:
5231       case PPC::SELECT_F4:
5232       case PPC::SELECT_F8:
5233       case PPC::SELECT_QFRC:
5234       case PPC::SELECT_QSRC:
5235       case PPC::SELECT_QBRC:
5236       case PPC::SELECT_VRRC:
5237       case PPC::SELECT_VSFRC:
5238       case PPC::SELECT_VSSRC:
5239       case PPC::SELECT_VSRC: {
5240         SDValue Op = MachineNode->getOperand(0);
5241         if (Op.isMachineOpcode()) {
5242           if (Op.getMachineOpcode() == PPC::CRSET)
5243             Op1Set = true;
5244           else if (Op.getMachineOpcode() == PPC::CRUNSET)
5245             Op1Unset = true;
5246           else if (Op.getMachineOpcode() == PPC::CRNOR &&
5247                    Op.getOperand(0) == Op.getOperand(1))
5248             Op1Not = true;
5249         }
5250         }
5251         break;
5252       }
5253 
5254       bool SelectSwap = false;
5255       switch (Opcode) {
5256       default: break;
5257       case PPC::CRAND:
5258         if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
5259           // x & x = x
5260           ResNode = MachineNode->getOperand(0).getNode();
5261         else if (Op1Set)
5262           // 1 & y = y
5263           ResNode = MachineNode->getOperand(1).getNode();
5264         else if (Op2Set)
5265           // x & 1 = x
5266           ResNode = MachineNode->getOperand(0).getNode();
5267         else if (Op1Unset || Op2Unset)
5268           // x & 0 = 0 & y = 0
5269           ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
5270                                            MVT::i1);
5271         else if (Op1Not)
5272           // ~x & y = andc(y, x)
5273           ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
5274                                            MVT::i1, MachineNode->getOperand(1),
5275                                            MachineNode->getOperand(0).
5276                                              getOperand(0));
5277         else if (Op2Not)
5278           // x & ~y = andc(x, y)
5279           ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
5280                                            MVT::i1, MachineNode->getOperand(0),
5281                                            MachineNode->getOperand(1).
5282                                              getOperand(0));
5283         else if (AllUsersSelectZero(MachineNode)) {
5284           ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode),
5285                                            MVT::i1, MachineNode->getOperand(0),
5286                                            MachineNode->getOperand(1));
5287           SelectSwap = true;
5288         }
5289         break;
5290       case PPC::CRNAND:
5291         if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
5292           // nand(x, x) -> nor(x, x)
5293           ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
5294                                            MVT::i1, MachineNode->getOperand(0),
5295                                            MachineNode->getOperand(0));
5296         else if (Op1Set)
5297           // nand(1, y) -> nor(y, y)
5298           ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
5299                                            MVT::i1, MachineNode->getOperand(1),
5300                                            MachineNode->getOperand(1));
5301         else if (Op2Set)
5302           // nand(x, 1) -> nor(x, x)
5303           ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
5304                                            MVT::i1, MachineNode->getOperand(0),
5305                                            MachineNode->getOperand(0));
5306         else if (Op1Unset || Op2Unset)
5307           // nand(x, 0) = nand(0, y) = 1
5308           ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
5309                                            MVT::i1);
5310         else if (Op1Not)
5311           // nand(~x, y) = ~(~x & y) = x | ~y = orc(x, y)
5312           ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
5313                                            MVT::i1, MachineNode->getOperand(0).
5314                                                       getOperand(0),
5315                                            MachineNode->getOperand(1));
5316         else if (Op2Not)
5317           // nand(x, ~y) = ~x | y = orc(y, x)
5318           ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
5319                                            MVT::i1, MachineNode->getOperand(1).
5320                                                       getOperand(0),
5321                                            MachineNode->getOperand(0));
5322         else if (AllUsersSelectZero(MachineNode)) {
5323           ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode),
5324                                            MVT::i1, MachineNode->getOperand(0),
5325                                            MachineNode->getOperand(1));
5326           SelectSwap = true;
5327         }
5328         break;
5329       case PPC::CROR:
5330         if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
5331           // x | x = x
5332           ResNode = MachineNode->getOperand(0).getNode();
5333         else if (Op1Set || Op2Set)
5334           // x | 1 = 1 | y = 1
5335           ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
5336                                            MVT::i1);
5337         else if (Op1Unset)
5338           // 0 | y = y
5339           ResNode = MachineNode->getOperand(1).getNode();
5340         else if (Op2Unset)
5341           // x | 0 = x
5342           ResNode = MachineNode->getOperand(0).getNode();
5343         else if (Op1Not)
5344           // ~x | y = orc(y, x)
5345           ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
5346                                            MVT::i1, MachineNode->getOperand(1),
5347                                            MachineNode->getOperand(0).
5348                                              getOperand(0));
5349         else if (Op2Not)
5350           // x | ~y = orc(x, y)
5351           ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
5352                                            MVT::i1, MachineNode->getOperand(0),
5353                                            MachineNode->getOperand(1).
5354                                              getOperand(0));
5355         else if (AllUsersSelectZero(MachineNode)) {
5356           ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
5357                                            MVT::i1, MachineNode->getOperand(0),
5358                                            MachineNode->getOperand(1));
5359           SelectSwap = true;
5360         }
5361         break;
5362       case PPC::CRXOR:
5363         if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
5364           // xor(x, x) = 0
5365           ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
5366                                            MVT::i1);
5367         else if (Op1Set)
5368           // xor(1, y) -> nor(y, y)
5369           ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
5370                                            MVT::i1, MachineNode->getOperand(1),
5371                                            MachineNode->getOperand(1));
5372         else if (Op2Set)
5373           // xor(x, 1) -> nor(x, x)
5374           ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
5375                                            MVT::i1, MachineNode->getOperand(0),
5376                                            MachineNode->getOperand(0));
5377         else if (Op1Unset)
5378           // xor(0, y) = y
5379           ResNode = MachineNode->getOperand(1).getNode();
5380         else if (Op2Unset)
5381           // xor(x, 0) = x
5382           ResNode = MachineNode->getOperand(0).getNode();
5383         else if (Op1Not)
5384           // xor(~x, y) = eqv(x, y)
5385           ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
5386                                            MVT::i1, MachineNode->getOperand(0).
5387                                                       getOperand(0),
5388                                            MachineNode->getOperand(1));
5389         else if (Op2Not)
5390           // xor(x, ~y) = eqv(x, y)
5391           ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
5392                                            MVT::i1, MachineNode->getOperand(0),
5393                                            MachineNode->getOperand(1).
5394                                              getOperand(0));
5395         else if (AllUsersSelectZero(MachineNode)) {
5396           ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
5397                                            MVT::i1, MachineNode->getOperand(0),
5398                                            MachineNode->getOperand(1));
5399           SelectSwap = true;
5400         }
5401         break;
5402       case PPC::CRNOR:
5403         if (Op1Set || Op2Set)
5404           // nor(1, y) -> 0
5405           ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
5406                                            MVT::i1);
5407         else if (Op1Unset)
5408           // nor(0, y) = ~y -> nor(y, y)
5409           ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
5410                                            MVT::i1, MachineNode->getOperand(1),
5411                                            MachineNode->getOperand(1));
5412         else if (Op2Unset)
5413           // nor(x, 0) = ~x
5414           ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
5415                                            MVT::i1, MachineNode->getOperand(0),
5416                                            MachineNode->getOperand(0));
5417         else if (Op1Not)
5418           // nor(~x, y) = andc(x, y)
5419           ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
5420                                            MVT::i1, MachineNode->getOperand(0).
5421                                                       getOperand(0),
5422                                            MachineNode->getOperand(1));
5423         else if (Op2Not)
5424           // nor(x, ~y) = andc(y, x)
5425           ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
5426                                            MVT::i1, MachineNode->getOperand(1).
5427                                                       getOperand(0),
5428                                            MachineNode->getOperand(0));
5429         else if (AllUsersSelectZero(MachineNode)) {
5430           ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode),
5431                                            MVT::i1, MachineNode->getOperand(0),
5432                                            MachineNode->getOperand(1));
5433           SelectSwap = true;
5434         }
5435         break;
5436       case PPC::CREQV:
5437         if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
5438           // eqv(x, x) = 1
5439           ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
5440                                            MVT::i1);
5441         else if (Op1Set)
5442           // eqv(1, y) = y
5443           ResNode = MachineNode->getOperand(1).getNode();
5444         else if (Op2Set)
5445           // eqv(x, 1) = x
5446           ResNode = MachineNode->getOperand(0).getNode();
5447         else if (Op1Unset)
5448           // eqv(0, y) = ~y -> nor(y, y)
5449           ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
5450                                            MVT::i1, MachineNode->getOperand(1),
5451                                            MachineNode->getOperand(1));
5452         else if (Op2Unset)
5453           // eqv(x, 0) = ~x
5454           ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
5455                                            MVT::i1, MachineNode->getOperand(0),
5456                                            MachineNode->getOperand(0));
5457         else if (Op1Not)
5458           // eqv(~x, y) = xor(x, y)
5459           ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
5460                                            MVT::i1, MachineNode->getOperand(0).
5461                                                       getOperand(0),
5462                                            MachineNode->getOperand(1));
5463         else if (Op2Not)
5464           // eqv(x, ~y) = xor(x, y)
5465           ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
5466                                            MVT::i1, MachineNode->getOperand(0),
5467                                            MachineNode->getOperand(1).
5468                                              getOperand(0));
5469         else if (AllUsersSelectZero(MachineNode)) {
5470           ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
5471                                            MVT::i1, MachineNode->getOperand(0),
5472                                            MachineNode->getOperand(1));
5473           SelectSwap = true;
5474         }
5475         break;
5476       case PPC::CRANDC:
5477         if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
5478           // andc(x, x) = 0
5479           ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
5480                                            MVT::i1);
5481         else if (Op1Set)
5482           // andc(1, y) = ~y
5483           ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
5484                                            MVT::i1, MachineNode->getOperand(1),
5485                                            MachineNode->getOperand(1));
5486         else if (Op1Unset || Op2Set)
5487           // andc(0, y) = andc(x, 1) = 0
5488           ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
5489                                            MVT::i1);
5490         else if (Op2Unset)
5491           // andc(x, 0) = x
5492           ResNode = MachineNode->getOperand(0).getNode();
5493         else if (Op1Not)
5494           // andc(~x, y) = ~(x | y) = nor(x, y)
5495           ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
5496                                            MVT::i1, MachineNode->getOperand(0).
5497                                                       getOperand(0),
5498                                            MachineNode->getOperand(1));
5499         else if (Op2Not)
5500           // andc(x, ~y) = x & y
5501           ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode),
5502                                            MVT::i1, MachineNode->getOperand(0),
5503                                            MachineNode->getOperand(1).
5504                                              getOperand(0));
5505         else if (AllUsersSelectZero(MachineNode)) {
5506           ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
5507                                            MVT::i1, MachineNode->getOperand(1),
5508                                            MachineNode->getOperand(0));
5509           SelectSwap = true;
5510         }
5511         break;
5512       case PPC::CRORC:
5513         if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
5514           // orc(x, x) = 1
5515           ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
5516                                            MVT::i1);
5517         else if (Op1Set || Op2Unset)
5518           // orc(1, y) = orc(x, 0) = 1
5519           ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
5520                                            MVT::i1);
5521         else if (Op2Set)
5522           // orc(x, 1) = x
5523           ResNode = MachineNode->getOperand(0).getNode();
5524         else if (Op1Unset)
5525           // orc(0, y) = ~y
5526           ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
5527                                            MVT::i1, MachineNode->getOperand(1),
5528                                            MachineNode->getOperand(1));
5529         else if (Op1Not)
5530           // orc(~x, y) = ~(x & y) = nand(x, y)
5531           ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode),
5532                                            MVT::i1, MachineNode->getOperand(0).
5533                                                       getOperand(0),
5534                                            MachineNode->getOperand(1));
5535         else if (Op2Not)
5536           // orc(x, ~y) = x | y
5537           ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode),
5538                                            MVT::i1, MachineNode->getOperand(0),
5539                                            MachineNode->getOperand(1).
5540                                              getOperand(0));
5541         else if (AllUsersSelectZero(MachineNode)) {
5542           ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
5543                                            MVT::i1, MachineNode->getOperand(1),
5544                                            MachineNode->getOperand(0));
5545           SelectSwap = true;
5546         }
5547         break;
5548       case PPC::SELECT_I4:
5549       case PPC::SELECT_I8:
5550       case PPC::SELECT_F4:
5551       case PPC::SELECT_F8:
5552       case PPC::SELECT_QFRC:
5553       case PPC::SELECT_QSRC:
5554       case PPC::SELECT_QBRC:
5555       case PPC::SELECT_VRRC:
5556       case PPC::SELECT_VSFRC:
5557       case PPC::SELECT_VSSRC:
5558       case PPC::SELECT_VSRC:
5559         if (Op1Set)
5560           ResNode = MachineNode->getOperand(1).getNode();
5561         else if (Op1Unset)
5562           ResNode = MachineNode->getOperand(2).getNode();
5563         else if (Op1Not)
5564           ResNode = CurDAG->getMachineNode(MachineNode->getMachineOpcode(),
5565                                            SDLoc(MachineNode),
5566                                            MachineNode->getValueType(0),
5567                                            MachineNode->getOperand(0).
5568                                              getOperand(0),
5569                                            MachineNode->getOperand(2),
5570                                            MachineNode->getOperand(1));
5571         break;
5572       case PPC::BC:
5573       case PPC::BCn:
5574         if (Op1Not)
5575           ResNode = CurDAG->getMachineNode(Opcode == PPC::BC ? PPC::BCn :
5576                                                                PPC::BC,
5577                                            SDLoc(MachineNode),
5578                                            MVT::Other,
5579                                            MachineNode->getOperand(0).
5580                                              getOperand(0),
5581                                            MachineNode->getOperand(1),
5582                                            MachineNode->getOperand(2));
5583         // FIXME: Handle Op1Set, Op1Unset here too.
5584         break;
5585       }
5586 
5587       // If we're inverting this node because it is used only by selects that
5588       // we'd like to swap, then swap the selects before the node replacement.
5589       if (SelectSwap)
5590         SwapAllSelectUsers(MachineNode);
5591 
5592       if (ResNode != MachineNode) {
5593         DEBUG(dbgs() << "CR Peephole replacing:\nOld:    ");
5594         DEBUG(MachineNode->dump(CurDAG));
5595         DEBUG(dbgs() << "\nNew: ");
5596         DEBUG(ResNode->dump(CurDAG));
5597         DEBUG(dbgs() << "\n");
5598 
5599         ReplaceUses(MachineNode, ResNode);
5600         IsModified = true;
5601       }
5602     }
5603     if (IsModified)
5604       CurDAG->RemoveDeadNodes();
5605   } while (IsModified);
5606 }
5607 
5608 // Gather the set of 32-bit operations that are known to have their
5609 // higher-order 32 bits zero, where ToPromote contains all such operations.
5610 static bool PeepholePPC64ZExtGather(SDValue Op32,
5611                                     SmallPtrSetImpl<SDNode *> &ToPromote) {
5612   if (!Op32.isMachineOpcode())
5613     return false;
5614 
5615   // First, check for the "frontier" instructions (those that will clear the
5616   // higher-order 32 bits.
5617 
5618   // For RLWINM and RLWNM, we need to make sure that the mask does not wrap
5619   // around. If it does not, then these instructions will clear the
5620   // higher-order bits.
5621   if ((Op32.getMachineOpcode() == PPC::RLWINM ||
5622        Op32.getMachineOpcode() == PPC::RLWNM) &&
5623       Op32.getConstantOperandVal(2) <= Op32.getConstantOperandVal(3)) {
5624     ToPromote.insert(Op32.getNode());
5625     return true;
5626   }
5627 
5628   // SLW and SRW always clear the higher-order bits.
5629   if (Op32.getMachineOpcode() == PPC::SLW ||
5630       Op32.getMachineOpcode() == PPC::SRW) {
5631     ToPromote.insert(Op32.getNode());
5632     return true;
5633   }
5634 
5635   // For LI and LIS, we need the immediate to be positive (so that it is not
5636   // sign extended).
5637   if (Op32.getMachineOpcode() == PPC::LI ||
5638       Op32.getMachineOpcode() == PPC::LIS) {
5639     if (!isUInt<15>(Op32.getConstantOperandVal(0)))
5640       return false;
5641 
5642     ToPromote.insert(Op32.getNode());
5643     return true;
5644   }
5645 
5646   // LHBRX and LWBRX always clear the higher-order bits.
5647   if (Op32.getMachineOpcode() == PPC::LHBRX ||
5648       Op32.getMachineOpcode() == PPC::LWBRX) {
5649     ToPromote.insert(Op32.getNode());
5650     return true;
5651   }
5652 
5653   // CNT[LT]ZW always produce a 64-bit value in [0,32], and so is zero extended.
5654   if (Op32.getMachineOpcode() == PPC::CNTLZW ||
5655       Op32.getMachineOpcode() == PPC::CNTTZW) {
5656     ToPromote.insert(Op32.getNode());
5657     return true;
5658   }
5659 
5660   // Next, check for those instructions we can look through.
5661 
5662   // Assuming the mask does not wrap around, then the higher-order bits are
5663   // taken directly from the first operand.
5664   if (Op32.getMachineOpcode() == PPC::RLWIMI &&
5665       Op32.getConstantOperandVal(3) <= Op32.getConstantOperandVal(4)) {
5666     SmallPtrSet<SDNode *, 16> ToPromote1;
5667     if (!PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1))
5668       return false;
5669 
5670     ToPromote.insert(Op32.getNode());
5671     ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
5672     return true;
5673   }
5674 
5675   // For OR, the higher-order bits are zero if that is true for both operands.
5676   // For SELECT_I4, the same is true (but the relevant operand numbers are
5677   // shifted by 1).
5678   if (Op32.getMachineOpcode() == PPC::OR ||
5679       Op32.getMachineOpcode() == PPC::SELECT_I4) {
5680     unsigned B = Op32.getMachineOpcode() == PPC::SELECT_I4 ? 1 : 0;
5681     SmallPtrSet<SDNode *, 16> ToPromote1;
5682     if (!PeepholePPC64ZExtGather(Op32.getOperand(B+0), ToPromote1))
5683       return false;
5684     if (!PeepholePPC64ZExtGather(Op32.getOperand(B+1), ToPromote1))
5685       return false;
5686 
5687     ToPromote.insert(Op32.getNode());
5688     ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
5689     return true;
5690   }
5691 
5692   // For ORI and ORIS, we need the higher-order bits of the first operand to be
5693   // zero, and also for the constant to be positive (so that it is not sign
5694   // extended).
5695   if (Op32.getMachineOpcode() == PPC::ORI ||
5696       Op32.getMachineOpcode() == PPC::ORIS) {
5697     SmallPtrSet<SDNode *, 16> ToPromote1;
5698     if (!PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1))
5699       return false;
5700     if (!isUInt<15>(Op32.getConstantOperandVal(1)))
5701       return false;
5702 
5703     ToPromote.insert(Op32.getNode());
5704     ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
5705     return true;
5706   }
5707 
5708   // The higher-order bits of AND are zero if that is true for at least one of
5709   // the operands.
5710   if (Op32.getMachineOpcode() == PPC::AND) {
5711     SmallPtrSet<SDNode *, 16> ToPromote1, ToPromote2;
5712     bool Op0OK =
5713       PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1);
5714     bool Op1OK =
5715       PeepholePPC64ZExtGather(Op32.getOperand(1), ToPromote2);
5716     if (!Op0OK && !Op1OK)
5717       return false;
5718 
5719     ToPromote.insert(Op32.getNode());
5720 
5721     if (Op0OK)
5722       ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
5723 
5724     if (Op1OK)
5725       ToPromote.insert(ToPromote2.begin(), ToPromote2.end());
5726 
5727     return true;
5728   }
5729 
5730   // For ANDI and ANDIS, the higher-order bits are zero if either that is true
5731   // of the first operand, or if the second operand is positive (so that it is
5732   // not sign extended).
5733   if (Op32.getMachineOpcode() == PPC::ANDIo ||
5734       Op32.getMachineOpcode() == PPC::ANDISo) {
5735     SmallPtrSet<SDNode *, 16> ToPromote1;
5736     bool Op0OK =
5737       PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1);
5738     bool Op1OK = isUInt<15>(Op32.getConstantOperandVal(1));
5739     if (!Op0OK && !Op1OK)
5740       return false;
5741 
5742     ToPromote.insert(Op32.getNode());
5743 
5744     if (Op0OK)
5745       ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
5746 
5747     return true;
5748   }
5749 
5750   return false;
5751 }
5752 
5753 void PPCDAGToDAGISel::PeepholePPC64ZExt() {
5754   if (!PPCSubTarget->isPPC64())
5755     return;
5756 
5757   // When we zero-extend from i32 to i64, we use a pattern like this:
5758   // def : Pat<(i64 (zext i32:$in)),
5759   //           (RLDICL (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $in, sub_32),
5760   //                   0, 32)>;
5761   // There are several 32-bit shift/rotate instructions, however, that will
5762   // clear the higher-order bits of their output, rendering the RLDICL
5763   // unnecessary. When that happens, we remove it here, and redefine the
5764   // relevant 32-bit operation to be a 64-bit operation.
5765 
5766   SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
5767   ++Position;
5768 
5769   bool MadeChange = false;
5770   while (Position != CurDAG->allnodes_begin()) {
5771     SDNode *N = &*--Position;
5772     // Skip dead nodes and any non-machine opcodes.
5773     if (N->use_empty() || !N->isMachineOpcode())
5774       continue;
5775 
5776     if (N->getMachineOpcode() != PPC::RLDICL)
5777       continue;
5778 
5779     if (N->getConstantOperandVal(1) != 0 ||
5780         N->getConstantOperandVal(2) != 32)
5781       continue;
5782 
5783     SDValue ISR = N->getOperand(0);
5784     if (!ISR.isMachineOpcode() ||
5785         ISR.getMachineOpcode() != TargetOpcode::INSERT_SUBREG)
5786       continue;
5787 
5788     if (!ISR.hasOneUse())
5789       continue;
5790 
5791     if (ISR.getConstantOperandVal(2) != PPC::sub_32)
5792       continue;
5793 
5794     SDValue IDef = ISR.getOperand(0);
5795     if (!IDef.isMachineOpcode() ||
5796         IDef.getMachineOpcode() != TargetOpcode::IMPLICIT_DEF)
5797       continue;
5798 
5799     // We now know that we're looking at a canonical i32 -> i64 zext. See if we
5800     // can get rid of it.
5801 
5802     SDValue Op32 = ISR->getOperand(1);
5803     if (!Op32.isMachineOpcode())
5804       continue;
5805 
5806     // There are some 32-bit instructions that always clear the high-order 32
5807     // bits, there are also some instructions (like AND) that we can look
5808     // through.
5809     SmallPtrSet<SDNode *, 16> ToPromote;
5810     if (!PeepholePPC64ZExtGather(Op32, ToPromote))
5811       continue;
5812 
5813     // If the ToPromote set contains nodes that have uses outside of the set
5814     // (except for the original INSERT_SUBREG), then abort the transformation.
5815     bool OutsideUse = false;
5816     for (SDNode *PN : ToPromote) {
5817       for (SDNode *UN : PN->uses()) {
5818         if (!ToPromote.count(UN) && UN != ISR.getNode()) {
5819           OutsideUse = true;
5820           break;
5821         }
5822       }
5823 
5824       if (OutsideUse)
5825         break;
5826     }
5827     if (OutsideUse)
5828       continue;
5829 
5830     MadeChange = true;
5831 
5832     // We now know that this zero extension can be removed by promoting to
5833     // nodes in ToPromote to 64-bit operations, where for operations in the
5834     // frontier of the set, we need to insert INSERT_SUBREGs for their
5835     // operands.
5836     for (SDNode *PN : ToPromote) {
5837       unsigned NewOpcode;
5838       switch (PN->getMachineOpcode()) {
5839       default:
5840         llvm_unreachable("Don't know the 64-bit variant of this instruction");
5841       case PPC::RLWINM:    NewOpcode = PPC::RLWINM8; break;
5842       case PPC::RLWNM:     NewOpcode = PPC::RLWNM8; break;
5843       case PPC::SLW:       NewOpcode = PPC::SLW8; break;
5844       case PPC::SRW:       NewOpcode = PPC::SRW8; break;
5845       case PPC::LI:        NewOpcode = PPC::LI8; break;
5846       case PPC::LIS:       NewOpcode = PPC::LIS8; break;
5847       case PPC::LHBRX:     NewOpcode = PPC::LHBRX8; break;
5848       case PPC::LWBRX:     NewOpcode = PPC::LWBRX8; break;
5849       case PPC::CNTLZW:    NewOpcode = PPC::CNTLZW8; break;
5850       case PPC::CNTTZW:    NewOpcode = PPC::CNTTZW8; break;
5851       case PPC::RLWIMI:    NewOpcode = PPC::RLWIMI8; break;
5852       case PPC::OR:        NewOpcode = PPC::OR8; break;
5853       case PPC::SELECT_I4: NewOpcode = PPC::SELECT_I8; break;
5854       case PPC::ORI:       NewOpcode = PPC::ORI8; break;
5855       case PPC::ORIS:      NewOpcode = PPC::ORIS8; break;
5856       case PPC::AND:       NewOpcode = PPC::AND8; break;
5857       case PPC::ANDIo:     NewOpcode = PPC::ANDIo8; break;
5858       case PPC::ANDISo:    NewOpcode = PPC::ANDISo8; break;
5859       }
5860 
5861       // Note: During the replacement process, the nodes will be in an
5862       // inconsistent state (some instructions will have operands with values
5863       // of the wrong type). Once done, however, everything should be right
5864       // again.
5865 
5866       SmallVector<SDValue, 4> Ops;
5867       for (const SDValue &V : PN->ops()) {
5868         if (!ToPromote.count(V.getNode()) && V.getValueType() == MVT::i32 &&
5869             !isa<ConstantSDNode>(V)) {
5870           SDValue ReplOpOps[] = { ISR.getOperand(0), V, ISR.getOperand(2) };
5871           SDNode *ReplOp =
5872             CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, SDLoc(V),
5873                                    ISR.getNode()->getVTList(), ReplOpOps);
5874           Ops.push_back(SDValue(ReplOp, 0));
5875         } else {
5876           Ops.push_back(V);
5877         }
5878       }
5879 
5880       // Because all to-be-promoted nodes only have users that are other
5881       // promoted nodes (or the original INSERT_SUBREG), we can safely replace
5882       // the i32 result value type with i64.
5883 
5884       SmallVector<EVT, 2> NewVTs;
5885       SDVTList VTs = PN->getVTList();
5886       for (unsigned i = 0, ie = VTs.NumVTs; i != ie; ++i)
5887         if (VTs.VTs[i] == MVT::i32)
5888           NewVTs.push_back(MVT::i64);
5889         else
5890           NewVTs.push_back(VTs.VTs[i]);
5891 
5892       DEBUG(dbgs() << "PPC64 ZExt Peephole morphing:\nOld:    ");
5893       DEBUG(PN->dump(CurDAG));
5894 
5895       CurDAG->SelectNodeTo(PN, NewOpcode, CurDAG->getVTList(NewVTs), Ops);
5896 
5897       DEBUG(dbgs() << "\nNew: ");
5898       DEBUG(PN->dump(CurDAG));
5899       DEBUG(dbgs() << "\n");
5900     }
5901 
5902     // Now we replace the original zero extend and its associated INSERT_SUBREG
5903     // with the value feeding the INSERT_SUBREG (which has now been promoted to
5904     // return an i64).
5905 
5906     DEBUG(dbgs() << "PPC64 ZExt Peephole replacing:\nOld:    ");
5907     DEBUG(N->dump(CurDAG));
5908     DEBUG(dbgs() << "\nNew: ");
5909     DEBUG(Op32.getNode()->dump(CurDAG));
5910     DEBUG(dbgs() << "\n");
5911 
5912     ReplaceUses(N, Op32.getNode());
5913   }
5914 
5915   if (MadeChange)
5916     CurDAG->RemoveDeadNodes();
5917 }
5918 
5919 void PPCDAGToDAGISel::PeepholePPC64() {
5920   // These optimizations are currently supported only for 64-bit SVR4.
5921   if (PPCSubTarget->isDarwin() || !PPCSubTarget->isPPC64())
5922     return;
5923 
5924   SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
5925   ++Position;
5926 
5927   while (Position != CurDAG->allnodes_begin()) {
5928     SDNode *N = &*--Position;
5929     // Skip dead nodes and any non-machine opcodes.
5930     if (N->use_empty() || !N->isMachineOpcode())
5931       continue;
5932 
5933     unsigned FirstOp;
5934     unsigned StorageOpcode = N->getMachineOpcode();
5935 
5936     switch (StorageOpcode) {
5937     default: continue;
5938 
5939     case PPC::LBZ:
5940     case PPC::LBZ8:
5941     case PPC::LD:
5942     case PPC::LFD:
5943     case PPC::LFS:
5944     case PPC::LHA:
5945     case PPC::LHA8:
5946     case PPC::LHZ:
5947     case PPC::LHZ8:
5948     case PPC::LWA:
5949     case PPC::LWZ:
5950     case PPC::LWZ8:
5951       FirstOp = 0;
5952       break;
5953 
5954     case PPC::STB:
5955     case PPC::STB8:
5956     case PPC::STD:
5957     case PPC::STFD:
5958     case PPC::STFS:
5959     case PPC::STH:
5960     case PPC::STH8:
5961     case PPC::STW:
5962     case PPC::STW8:
5963       FirstOp = 1;
5964       break;
5965     }
5966 
5967     // If this is a load or store with a zero offset, or within the alignment,
5968     // we may be able to fold an add-immediate into the memory operation.
5969     // The check against alignment is below, as it can't occur until we check
5970     // the arguments to N
5971     if (!isa<ConstantSDNode>(N->getOperand(FirstOp)))
5972       continue;
5973 
5974     SDValue Base = N->getOperand(FirstOp + 1);
5975     if (!Base.isMachineOpcode())
5976       continue;
5977 
5978     unsigned Flags = 0;
5979     bool ReplaceFlags = true;
5980 
5981     // When the feeding operation is an add-immediate of some sort,
5982     // determine whether we need to add relocation information to the
5983     // target flags on the immediate operand when we fold it into the
5984     // load instruction.
5985     //
5986     // For something like ADDItocL, the relocation information is
5987     // inferred from the opcode; when we process it in the AsmPrinter,
5988     // we add the necessary relocation there.  A load, though, can receive
5989     // relocation from various flavors of ADDIxxx, so we need to carry
5990     // the relocation information in the target flags.
5991     switch (Base.getMachineOpcode()) {
5992     default: continue;
5993 
5994     case PPC::ADDI8:
5995     case PPC::ADDI:
5996       // In some cases (such as TLS) the relocation information
5997       // is already in place on the operand, so copying the operand
5998       // is sufficient.
5999       ReplaceFlags = false;
6000       // For these cases, the immediate may not be divisible by 4, in
6001       // which case the fold is illegal for DS-form instructions.  (The
6002       // other cases provide aligned addresses and are always safe.)
6003       if ((StorageOpcode == PPC::LWA ||
6004            StorageOpcode == PPC::LD  ||
6005            StorageOpcode == PPC::STD) &&
6006           (!isa<ConstantSDNode>(Base.getOperand(1)) ||
6007            Base.getConstantOperandVal(1) % 4 != 0))
6008         continue;
6009       break;
6010     case PPC::ADDIdtprelL:
6011       Flags = PPCII::MO_DTPREL_LO;
6012       break;
6013     case PPC::ADDItlsldL:
6014       Flags = PPCII::MO_TLSLD_LO;
6015       break;
6016     case PPC::ADDItocL:
6017       Flags = PPCII::MO_TOC_LO;
6018       break;
6019     }
6020 
6021     SDValue ImmOpnd = Base.getOperand(1);
6022 
6023     // On PPC64, the TOC base pointer is guaranteed by the ABI only to have
6024     // 8-byte alignment, and so we can only use offsets less than 8 (otherwise,
6025     // we might have needed different @ha relocation values for the offset
6026     // pointers).
6027     int MaxDisplacement = 7;
6028     if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
6029       const GlobalValue *GV = GA->getGlobal();
6030       MaxDisplacement = std::min((int) GV->getAlignment() - 1, MaxDisplacement);
6031     }
6032 
6033     bool UpdateHBase = false;
6034     SDValue HBase = Base.getOperand(0);
6035 
6036     int Offset = N->getConstantOperandVal(FirstOp);
6037     if (ReplaceFlags) {
6038       if (Offset < 0 || Offset > MaxDisplacement) {
6039         // If we have a addi(toc@l)/addis(toc@ha) pair, and the addis has only
6040         // one use, then we can do this for any offset, we just need to also
6041         // update the offset (i.e. the symbol addend) on the addis also.
6042         if (Base.getMachineOpcode() != PPC::ADDItocL)
6043           continue;
6044 
6045         if (!HBase.isMachineOpcode() ||
6046             HBase.getMachineOpcode() != PPC::ADDIStocHA)
6047           continue;
6048 
6049         if (!Base.hasOneUse() || !HBase.hasOneUse())
6050           continue;
6051 
6052         SDValue HImmOpnd = HBase.getOperand(1);
6053         if (HImmOpnd != ImmOpnd)
6054           continue;
6055 
6056         UpdateHBase = true;
6057       }
6058     } else {
6059       // If we're directly folding the addend from an addi instruction, then:
6060       //  1. In general, the offset on the memory access must be zero.
6061       //  2. If the addend is a constant, then it can be combined with a
6062       //     non-zero offset, but only if the result meets the encoding
6063       //     requirements.
6064       if (auto *C = dyn_cast<ConstantSDNode>(ImmOpnd)) {
6065         Offset += C->getSExtValue();
6066 
6067         if ((StorageOpcode == PPC::LWA || StorageOpcode == PPC::LD ||
6068              StorageOpcode == PPC::STD) && (Offset % 4) != 0)
6069           continue;
6070 
6071         if (!isInt<16>(Offset))
6072           continue;
6073 
6074         ImmOpnd = CurDAG->getTargetConstant(Offset, SDLoc(ImmOpnd),
6075                                             ImmOpnd.getValueType());
6076       } else if (Offset != 0) {
6077         continue;
6078       }
6079     }
6080 
6081     // We found an opportunity.  Reverse the operands from the add
6082     // immediate and substitute them into the load or store.  If
6083     // needed, update the target flags for the immediate operand to
6084     // reflect the necessary relocation information.
6085     DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase:    ");
6086     DEBUG(Base->dump(CurDAG));
6087     DEBUG(dbgs() << "\nN: ");
6088     DEBUG(N->dump(CurDAG));
6089     DEBUG(dbgs() << "\n");
6090 
6091     // If the relocation information isn't already present on the
6092     // immediate operand, add it now.
6093     if (ReplaceFlags) {
6094       if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
6095         SDLoc dl(GA);
6096         const GlobalValue *GV = GA->getGlobal();
6097         // We can't perform this optimization for data whose alignment
6098         // is insufficient for the instruction encoding.
6099         if (GV->getAlignment() < 4 &&
6100             (StorageOpcode == PPC::LD || StorageOpcode == PPC::STD ||
6101              StorageOpcode == PPC::LWA || (Offset % 4) != 0)) {
6102           DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n");
6103           continue;
6104         }
6105         ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, Offset, Flags);
6106       } else if (ConstantPoolSDNode *CP =
6107                  dyn_cast<ConstantPoolSDNode>(ImmOpnd)) {
6108         const Constant *C = CP->getConstVal();
6109         ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64,
6110                                                 CP->getAlignment(),
6111                                                 Offset, Flags);
6112       }
6113     }
6114 
6115     if (FirstOp == 1) // Store
6116       (void)CurDAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd,
6117                                        Base.getOperand(0), N->getOperand(3));
6118     else // Load
6119       (void)CurDAG->UpdateNodeOperands(N, ImmOpnd, Base.getOperand(0),
6120                                        N->getOperand(2));
6121 
6122     if (UpdateHBase)
6123       (void)CurDAG->UpdateNodeOperands(HBase.getNode(), HBase.getOperand(0),
6124                                        ImmOpnd);
6125 
6126     // The add-immediate may now be dead, in which case remove it.
6127     if (Base.getNode()->use_empty())
6128       CurDAG->RemoveDeadNode(Base.getNode());
6129   }
6130 }
6131 
6132 /// createPPCISelDag - This pass converts a legalized DAG into a
6133 /// PowerPC-specific DAG, ready for instruction scheduling.
6134 ///
6135 FunctionPass *llvm::createPPCISelDag(PPCTargetMachine &TM,
6136                                      CodeGenOpt::Level OptLevel) {
6137   return new PPCDAGToDAGISel(TM, OptLevel);
6138 }
6139