1 //===-- PPCISelDAGToDAG.cpp - PPC --pattern matching inst selector --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines a pattern matching instruction selector for PowerPC,
10 // converting from a legalized dag to a PPC dag.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "MCTargetDesc/PPCMCTargetDesc.h"
15 #include "MCTargetDesc/PPCPredicates.h"
16 #include "PPC.h"
17 #include "PPCISelLowering.h"
18 #include "PPCMachineFunctionInfo.h"
19 #include "PPCSubtarget.h"
20 #include "PPCTargetMachine.h"
21 #include "llvm/ADT/APInt.h"
22 #include "llvm/ADT/DenseMap.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/ADT/SmallPtrSet.h"
25 #include "llvm/ADT/SmallVector.h"
26 #include "llvm/ADT/Statistic.h"
27 #include "llvm/Analysis/BranchProbabilityInfo.h"
28 #include "llvm/CodeGen/FunctionLoweringInfo.h"
29 #include "llvm/CodeGen/ISDOpcodes.h"
30 #include "llvm/CodeGen/MachineBasicBlock.h"
31 #include "llvm/CodeGen/MachineFunction.h"
32 #include "llvm/CodeGen/MachineInstrBuilder.h"
33 #include "llvm/CodeGen/MachineRegisterInfo.h"
34 #include "llvm/CodeGen/SelectionDAG.h"
35 #include "llvm/CodeGen/SelectionDAGISel.h"
36 #include "llvm/CodeGen/SelectionDAGNodes.h"
37 #include "llvm/CodeGen/TargetInstrInfo.h"
38 #include "llvm/CodeGen/TargetRegisterInfo.h"
39 #include "llvm/CodeGen/ValueTypes.h"
40 #include "llvm/IR/BasicBlock.h"
41 #include "llvm/IR/DebugLoc.h"
42 #include "llvm/IR/Function.h"
43 #include "llvm/IR/GlobalValue.h"
44 #include "llvm/IR/InlineAsm.h"
45 #include "llvm/IR/InstrTypes.h"
46 #include "llvm/IR/Module.h"
47 #include "llvm/Support/Casting.h"
48 #include "llvm/Support/CodeGen.h"
49 #include "llvm/Support/CommandLine.h"
50 #include "llvm/Support/Compiler.h"
51 #include "llvm/Support/Debug.h"
52 #include "llvm/Support/ErrorHandling.h"
53 #include "llvm/Support/KnownBits.h"
54 #include "llvm/Support/MachineValueType.h"
55 #include "llvm/Support/MathExtras.h"
56 #include "llvm/Support/raw_ostream.h"
57 #include <algorithm>
58 #include <cassert>
59 #include <cstdint>
60 #include <iterator>
61 #include <limits>
62 #include <memory>
63 #include <new>
64 #include <tuple>
65 #include <utility>
66 
67 using namespace llvm;
68 
69 #define DEBUG_TYPE "ppc-codegen"
70 
71 STATISTIC(NumSextSetcc,
72           "Number of (sext(setcc)) nodes expanded into GPR sequence.");
73 STATISTIC(NumZextSetcc,
74           "Number of (zext(setcc)) nodes expanded into GPR sequence.");
75 STATISTIC(SignExtensionsAdded,
76           "Number of sign extensions for compare inputs added.");
77 STATISTIC(ZeroExtensionsAdded,
78           "Number of zero extensions for compare inputs added.");
79 STATISTIC(NumLogicOpsOnComparison,
80           "Number of logical ops on i1 values calculated in GPR.");
81 STATISTIC(OmittedForNonExtendUses,
82           "Number of compares not eliminated as they have non-extending uses.");
83 STATISTIC(NumP9Setb,
84           "Number of compares lowered to setb.");
85 
86 // FIXME: Remove this once the bug has been fixed!
87 cl::opt<bool> ANDIGlueBug("expose-ppc-andi-glue-bug",
88 cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden);
89 
90 static cl::opt<bool>
91     UseBitPermRewriter("ppc-use-bit-perm-rewriter", cl::init(true),
92                        cl::desc("use aggressive ppc isel for bit permutations"),
93                        cl::Hidden);
94 static cl::opt<bool> BPermRewriterNoMasking(
95     "ppc-bit-perm-rewriter-stress-rotates",
96     cl::desc("stress rotate selection in aggressive ppc isel for "
97              "bit permutations"),
98     cl::Hidden);
99 
100 static cl::opt<bool> EnableBranchHint(
101   "ppc-use-branch-hint", cl::init(true),
102     cl::desc("Enable static hinting of branches on ppc"),
103     cl::Hidden);
104 
105 static cl::opt<bool> EnableTLSOpt(
106   "ppc-tls-opt", cl::init(true),
107     cl::desc("Enable tls optimization peephole"),
108     cl::Hidden);
109 
110 enum ICmpInGPRType { ICGPR_All, ICGPR_None, ICGPR_I32, ICGPR_I64,
111   ICGPR_NonExtIn, ICGPR_Zext, ICGPR_Sext, ICGPR_ZextI32,
112   ICGPR_SextI32, ICGPR_ZextI64, ICGPR_SextI64 };
113 
114 static cl::opt<ICmpInGPRType> CmpInGPR(
115   "ppc-gpr-icmps", cl::Hidden, cl::init(ICGPR_All),
116   cl::desc("Specify the types of comparisons to emit GPR-only code for."),
117   cl::values(clEnumValN(ICGPR_None, "none", "Do not modify integer comparisons."),
118              clEnumValN(ICGPR_All, "all", "All possible int comparisons in GPRs."),
119              clEnumValN(ICGPR_I32, "i32", "Only i32 comparisons in GPRs."),
120              clEnumValN(ICGPR_I64, "i64", "Only i64 comparisons in GPRs."),
121              clEnumValN(ICGPR_NonExtIn, "nonextin",
122                         "Only comparisons where inputs don't need [sz]ext."),
123              clEnumValN(ICGPR_Zext, "zext", "Only comparisons with zext result."),
124              clEnumValN(ICGPR_ZextI32, "zexti32",
125                         "Only i32 comparisons with zext result."),
126              clEnumValN(ICGPR_ZextI64, "zexti64",
127                         "Only i64 comparisons with zext result."),
128              clEnumValN(ICGPR_Sext, "sext", "Only comparisons with sext result."),
129              clEnumValN(ICGPR_SextI32, "sexti32",
130                         "Only i32 comparisons with sext result."),
131              clEnumValN(ICGPR_SextI64, "sexti64",
132                         "Only i64 comparisons with sext result.")));
133 namespace {
134 
135   //===--------------------------------------------------------------------===//
136   /// PPCDAGToDAGISel - PPC specific code to select PPC machine
137   /// instructions for SelectionDAG operations.
138   ///
139   class PPCDAGToDAGISel : public SelectionDAGISel {
140     const PPCTargetMachine &TM;
141     const PPCSubtarget *PPCSubTarget;
142     const PPCTargetLowering *PPCLowering;
143     unsigned GlobalBaseReg;
144 
145   public:
146     explicit PPCDAGToDAGISel(PPCTargetMachine &tm, CodeGenOpt::Level OptLevel)
147         : SelectionDAGISel(tm, OptLevel), TM(tm) {}
148 
149     bool runOnMachineFunction(MachineFunction &MF) override {
150       // Make sure we re-emit a set of the global base reg if necessary
151       GlobalBaseReg = 0;
152       PPCSubTarget = &MF.getSubtarget<PPCSubtarget>();
153       PPCLowering = PPCSubTarget->getTargetLowering();
154       SelectionDAGISel::runOnMachineFunction(MF);
155 
156       if (!PPCSubTarget->isSVR4ABI())
157         InsertVRSaveCode(MF);
158 
159       return true;
160     }
161 
162     void PreprocessISelDAG() override;
163     void PostprocessISelDAG() override;
164 
165     /// getI16Imm - Return a target constant with the specified value, of type
166     /// i16.
167     inline SDValue getI16Imm(unsigned Imm, const SDLoc &dl) {
168       return CurDAG->getTargetConstant(Imm, dl, MVT::i16);
169     }
170 
171     /// getI32Imm - Return a target constant with the specified value, of type
172     /// i32.
173     inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
174       return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
175     }
176 
177     /// getI64Imm - Return a target constant with the specified value, of type
178     /// i64.
179     inline SDValue getI64Imm(uint64_t Imm, const SDLoc &dl) {
180       return CurDAG->getTargetConstant(Imm, dl, MVT::i64);
181     }
182 
183     /// getSmallIPtrImm - Return a target constant of pointer type.
184     inline SDValue getSmallIPtrImm(unsigned Imm, const SDLoc &dl) {
185       return CurDAG->getTargetConstant(
186           Imm, dl, PPCLowering->getPointerTy(CurDAG->getDataLayout()));
187     }
188 
189     /// isRotateAndMask - Returns true if Mask and Shift can be folded into a
190     /// rotate and mask opcode and mask operation.
191     static bool isRotateAndMask(SDNode *N, unsigned Mask, bool isShiftMask,
192                                 unsigned &SH, unsigned &MB, unsigned &ME);
193 
194     /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC
195     /// base register.  Return the virtual register that holds this value.
196     SDNode *getGlobalBaseReg();
197 
198     void selectFrameIndex(SDNode *SN, SDNode *N, unsigned Offset = 0);
199 
200     // Select - Convert the specified operand from a target-independent to a
201     // target-specific node if it hasn't already been changed.
202     void Select(SDNode *N) override;
203 
204     bool tryBitfieldInsert(SDNode *N);
205     bool tryBitPermutation(SDNode *N);
206     bool tryIntCompareInGPR(SDNode *N);
207 
208     // tryTLSXFormLoad - Convert an ISD::LOAD fed by a PPCISD::ADD_TLS into
209     // an X-Form load instruction with the offset being a relocation coming from
210     // the PPCISD::ADD_TLS.
211     bool tryTLSXFormLoad(LoadSDNode *N);
212     // tryTLSXFormStore - Convert an ISD::STORE fed by a PPCISD::ADD_TLS into
213     // an X-Form store instruction with the offset being a relocation coming from
214     // the PPCISD::ADD_TLS.
215     bool tryTLSXFormStore(StoreSDNode *N);
216     /// SelectCC - Select a comparison of the specified values with the
217     /// specified condition code, returning the CR# of the expression.
218     SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
219                      const SDLoc &dl);
220 
221     /// SelectAddrImmOffs - Return true if the operand is valid for a preinc
222     /// immediate field.  Note that the operand at this point is already the
223     /// result of a prior SelectAddressRegImm call.
224     bool SelectAddrImmOffs(SDValue N, SDValue &Out) const {
225       if (N.getOpcode() == ISD::TargetConstant ||
226           N.getOpcode() == ISD::TargetGlobalAddress) {
227         Out = N;
228         return true;
229       }
230 
231       return false;
232     }
233 
234     /// SelectAddrIdx - Given the specified address, check to see if it can be
235     /// represented as an indexed [r+r] operation.
236     /// This is for xform instructions whose associated displacement form is D.
237     /// The last parameter \p 0 means associated D form has no requirment for 16
238     /// bit signed displacement.
239     /// Returns false if it can be represented by [r+imm], which are preferred.
240     bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) {
241       return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, 0);
242     }
243 
244     /// SelectAddrIdx4 - Given the specified address, check to see if it can be
245     /// represented as an indexed [r+r] operation.
246     /// This is for xform instructions whose associated displacement form is DS.
247     /// The last parameter \p 4 means associated DS form 16 bit signed
248     /// displacement must be a multiple of 4.
249     /// Returns false if it can be represented by [r+imm], which are preferred.
250     bool SelectAddrIdxX4(SDValue N, SDValue &Base, SDValue &Index) {
251       return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, 4);
252     }
253 
254     /// SelectAddrIdx16 - Given the specified address, check to see if it can be
255     /// represented as an indexed [r+r] operation.
256     /// This is for xform instructions whose associated displacement form is DQ.
257     /// The last parameter \p 16 means associated DQ form 16 bit signed
258     /// displacement must be a multiple of 16.
259     /// Returns false if it can be represented by [r+imm], which are preferred.
260     bool SelectAddrIdxX16(SDValue N, SDValue &Base, SDValue &Index) {
261       return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, 16);
262     }
263 
264     /// SelectAddrIdxOnly - Given the specified address, force it to be
265     /// represented as an indexed [r+r] operation.
266     bool SelectAddrIdxOnly(SDValue N, SDValue &Base, SDValue &Index) {
267       return PPCLowering->SelectAddressRegRegOnly(N, Base, Index, *CurDAG);
268     }
269 
270     /// SelectAddrImm - Returns true if the address N can be represented by
271     /// a base register plus a signed 16-bit displacement [r+imm].
272     /// The last parameter \p 0 means D form has no requirment for 16 bit signed
273     /// displacement.
274     bool SelectAddrImm(SDValue N, SDValue &Disp,
275                        SDValue &Base) {
276       return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 0);
277     }
278 
279     /// SelectAddrImmX4 - Returns true if the address N can be represented by
280     /// a base register plus a signed 16-bit displacement that is a multiple of
281     /// 4 (last parameter). Suitable for use by STD and friends.
282     bool SelectAddrImmX4(SDValue N, SDValue &Disp, SDValue &Base) {
283       return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 4);
284     }
285 
286     /// SelectAddrImmX16 - Returns true if the address N can be represented by
287     /// a base register plus a signed 16-bit displacement that is a multiple of
288     /// 16(last parameter). Suitable for use by STXV and friends.
289     bool SelectAddrImmX16(SDValue N, SDValue &Disp, SDValue &Base) {
290       return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 16);
291     }
292 
293     // Select an address into a single register.
294     bool SelectAddr(SDValue N, SDValue &Base) {
295       Base = N;
296       return true;
297     }
298 
299     /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
300     /// inline asm expressions.  It is always correct to compute the value into
301     /// a register.  The case of adding a (possibly relocatable) constant to a
302     /// register can be improved, but it is wrong to substitute Reg+Reg for
303     /// Reg in an asm, because the load or store opcode would have to change.
304     bool SelectInlineAsmMemoryOperand(const SDValue &Op,
305                                       unsigned ConstraintID,
306                                       std::vector<SDValue> &OutOps) override {
307       switch(ConstraintID) {
308       default:
309         errs() << "ConstraintID: " << ConstraintID << "\n";
310         llvm_unreachable("Unexpected asm memory constraint");
311       case InlineAsm::Constraint_es:
312       case InlineAsm::Constraint_i:
313       case InlineAsm::Constraint_m:
314       case InlineAsm::Constraint_o:
315       case InlineAsm::Constraint_Q:
316       case InlineAsm::Constraint_Z:
317       case InlineAsm::Constraint_Zy:
318         // We need to make sure that this one operand does not end up in r0
319         // (because we might end up lowering this as 0(%op)).
320         const TargetRegisterInfo *TRI = PPCSubTarget->getRegisterInfo();
321         const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF, /*Kind=*/1);
322         SDLoc dl(Op);
323         SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i32);
324         SDValue NewOp =
325           SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
326                                          dl, Op.getValueType(),
327                                          Op, RC), 0);
328 
329         OutOps.push_back(NewOp);
330         return false;
331       }
332       return true;
333     }
334 
335     void InsertVRSaveCode(MachineFunction &MF);
336 
337     StringRef getPassName() const override {
338       return "PowerPC DAG->DAG Pattern Instruction Selection";
339     }
340 
341 // Include the pieces autogenerated from the target description.
342 #include "PPCGenDAGISel.inc"
343 
344 private:
345     bool trySETCC(SDNode *N);
346 
347     void PeepholePPC64();
348     void PeepholePPC64ZExt();
349     void PeepholeCROps();
350 
351     SDValue combineToCMPB(SDNode *N);
352     void foldBoolExts(SDValue &Res, SDNode *&N);
353 
354     bool AllUsersSelectZero(SDNode *N);
355     void SwapAllSelectUsers(SDNode *N);
356 
357     bool isOffsetMultipleOf(SDNode *N, unsigned Val) const;
358     void transferMemOperands(SDNode *N, SDNode *Result);
359   };
360 
361 } // end anonymous namespace
362 
363 /// InsertVRSaveCode - Once the entire function has been instruction selected,
364 /// all virtual registers are created and all machine instructions are built,
365 /// check to see if we need to save/restore VRSAVE.  If so, do it.
366 void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) {
367   // Check to see if this function uses vector registers, which means we have to
368   // save and restore the VRSAVE register and update it with the regs we use.
369   //
370   // In this case, there will be virtual registers of vector type created
371   // by the scheduler.  Detect them now.
372   bool HasVectorVReg = false;
373   for (unsigned i = 0, e = RegInfo->getNumVirtRegs(); i != e; ++i) {
374     unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
375     if (RegInfo->getRegClass(Reg) == &PPC::VRRCRegClass) {
376       HasVectorVReg = true;
377       break;
378     }
379   }
380   if (!HasVectorVReg) return;  // nothing to do.
381 
382   // If we have a vector register, we want to emit code into the entry and exit
383   // blocks to save and restore the VRSAVE register.  We do this here (instead
384   // of marking all vector instructions as clobbering VRSAVE) for two reasons:
385   //
386   // 1. This (trivially) reduces the load on the register allocator, by not
387   //    having to represent the live range of the VRSAVE register.
388   // 2. This (more significantly) allows us to create a temporary virtual
389   //    register to hold the saved VRSAVE value, allowing this temporary to be
390   //    register allocated, instead of forcing it to be spilled to the stack.
391 
392   // Create two vregs - one to hold the VRSAVE register that is live-in to the
393   // function and one for the value after having bits or'd into it.
394   unsigned InVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
395   unsigned UpdatedVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
396 
397   const TargetInstrInfo &TII = *PPCSubTarget->getInstrInfo();
398   MachineBasicBlock &EntryBB = *Fn.begin();
399   DebugLoc dl;
400   // Emit the following code into the entry block:
401   // InVRSAVE = MFVRSAVE
402   // UpdatedVRSAVE = UPDATE_VRSAVE InVRSAVE
403   // MTVRSAVE UpdatedVRSAVE
404   MachineBasicBlock::iterator IP = EntryBB.begin();  // Insert Point
405   BuildMI(EntryBB, IP, dl, TII.get(PPC::MFVRSAVE), InVRSAVE);
406   BuildMI(EntryBB, IP, dl, TII.get(PPC::UPDATE_VRSAVE),
407           UpdatedVRSAVE).addReg(InVRSAVE);
408   BuildMI(EntryBB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(UpdatedVRSAVE);
409 
410   // Find all return blocks, outputting a restore in each epilog.
411   for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
412     if (BB->isReturnBlock()) {
413       IP = BB->end(); --IP;
414 
415       // Skip over all terminator instructions, which are part of the return
416       // sequence.
417       MachineBasicBlock::iterator I2 = IP;
418       while (I2 != BB->begin() && (--I2)->isTerminator())
419         IP = I2;
420 
421       // Emit: MTVRSAVE InVRSave
422       BuildMI(*BB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(InVRSAVE);
423     }
424   }
425 }
426 
427 /// getGlobalBaseReg - Output the instructions required to put the
428 /// base address to use for accessing globals into a register.
429 ///
430 SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
431   if (!GlobalBaseReg) {
432     const TargetInstrInfo &TII = *PPCSubTarget->getInstrInfo();
433     // Insert the set of GlobalBaseReg into the first MBB of the function
434     MachineBasicBlock &FirstMBB = MF->front();
435     MachineBasicBlock::iterator MBBI = FirstMBB.begin();
436     const Module *M = MF->getFunction().getParent();
437     DebugLoc dl;
438 
439     if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) == MVT::i32) {
440       if (PPCSubTarget->isTargetELF()) {
441         GlobalBaseReg = PPC::R30;
442         if (M->getPICLevel() == PICLevel::SmallPIC) {
443           BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MoveGOTtoLR));
444           BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
445           MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
446         } else {
447           BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
448           BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
449           unsigned TempReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
450           BuildMI(FirstMBB, MBBI, dl,
451                   TII.get(PPC::UpdateGBR), GlobalBaseReg)
452                   .addReg(TempReg, RegState::Define).addReg(GlobalBaseReg);
453           MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
454         }
455       } else {
456         GlobalBaseReg =
457           RegInfo->createVirtualRegister(&PPC::GPRC_and_GPRC_NOR0RegClass);
458         BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
459         BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
460       }
461     } else {
462       // We must ensure that this sequence is dominated by the prologue.
463       // FIXME: This is a bit of a big hammer since we don't get the benefits
464       // of shrink-wrapping whenever we emit this instruction. Considering
465       // this is used in any function where we emit a jump table, this may be
466       // a significant limitation. We should consider inserting this in the
467       // block where it is used and then commoning this sequence up if it
468       // appears in multiple places.
469       // Note: on ISA 3.0 cores, we can use lnia (addpcis) instead of
470       // MovePCtoLR8.
471       MF->getInfo<PPCFunctionInfo>()->setShrinkWrapDisabled(true);
472       GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
473       BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8));
474       BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR8), GlobalBaseReg);
475     }
476   }
477   return CurDAG->getRegister(GlobalBaseReg,
478                              PPCLowering->getPointerTy(CurDAG->getDataLayout()))
479       .getNode();
480 }
481 
482 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
483 /// operand. If so Imm will receive the 32-bit value.
484 static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
485   if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
486     Imm = cast<ConstantSDNode>(N)->getZExtValue();
487     return true;
488   }
489   return false;
490 }
491 
492 /// isInt64Immediate - This method tests to see if the node is a 64-bit constant
493 /// operand.  If so Imm will receive the 64-bit value.
494 static bool isInt64Immediate(SDNode *N, uint64_t &Imm) {
495   if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i64) {
496     Imm = cast<ConstantSDNode>(N)->getZExtValue();
497     return true;
498   }
499   return false;
500 }
501 
502 // isInt32Immediate - This method tests to see if a constant operand.
503 // If so Imm will receive the 32 bit value.
504 static bool isInt32Immediate(SDValue N, unsigned &Imm) {
505   return isInt32Immediate(N.getNode(), Imm);
506 }
507 
508 /// isInt64Immediate - This method tests to see if the value is a 64-bit
509 /// constant operand. If so Imm will receive the 64-bit value.
510 static bool isInt64Immediate(SDValue N, uint64_t &Imm) {
511   return isInt64Immediate(N.getNode(), Imm);
512 }
513 
514 static unsigned getBranchHint(unsigned PCC, FunctionLoweringInfo *FuncInfo,
515                               const SDValue &DestMBB) {
516   assert(isa<BasicBlockSDNode>(DestMBB));
517 
518   if (!FuncInfo->BPI) return PPC::BR_NO_HINT;
519 
520   const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
521   const Instruction *BBTerm = BB->getTerminator();
522 
523   if (BBTerm->getNumSuccessors() != 2) return PPC::BR_NO_HINT;
524 
525   const BasicBlock *TBB = BBTerm->getSuccessor(0);
526   const BasicBlock *FBB = BBTerm->getSuccessor(1);
527 
528   auto TProb = FuncInfo->BPI->getEdgeProbability(BB, TBB);
529   auto FProb = FuncInfo->BPI->getEdgeProbability(BB, FBB);
530 
531   // We only want to handle cases which are easy to predict at static time, e.g.
532   // C++ throw statement, that is very likely not taken, or calling never
533   // returned function, e.g. stdlib exit(). So we set Threshold to filter
534   // unwanted cases.
535   //
536   // Below is LLVM branch weight table, we only want to handle case 1, 2
537   //
538   // Case                  Taken:Nontaken  Example
539   // 1. Unreachable        1048575:1       C++ throw, stdlib exit(),
540   // 2. Invoke-terminating 1:1048575
541   // 3. Coldblock          4:64            __builtin_expect
542   // 4. Loop Branch        124:4           For loop
543   // 5. PH/ZH/FPH          20:12
544   const uint32_t Threshold = 10000;
545 
546   if (std::max(TProb, FProb) / Threshold < std::min(TProb, FProb))
547     return PPC::BR_NO_HINT;
548 
549   LLVM_DEBUG(dbgs() << "Use branch hint for '" << FuncInfo->Fn->getName()
550                     << "::" << BB->getName() << "'\n"
551                     << " -> " << TBB->getName() << ": " << TProb << "\n"
552                     << " -> " << FBB->getName() << ": " << FProb << "\n");
553 
554   const BasicBlockSDNode *BBDN = cast<BasicBlockSDNode>(DestMBB);
555 
556   // If Dest BasicBlock is False-BasicBlock (FBB), swap branch probabilities,
557   // because we want 'TProb' stands for 'branch probability' to Dest BasicBlock
558   if (BBDN->getBasicBlock()->getBasicBlock() != TBB)
559     std::swap(TProb, FProb);
560 
561   return (TProb > FProb) ? PPC::BR_TAKEN_HINT : PPC::BR_NONTAKEN_HINT;
562 }
563 
564 // isOpcWithIntImmediate - This method tests to see if the node is a specific
565 // opcode and that it has a immediate integer right operand.
566 // If so Imm will receive the 32 bit value.
567 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
568   return N->getOpcode() == Opc
569          && isInt32Immediate(N->getOperand(1).getNode(), Imm);
570 }
571 
572 void PPCDAGToDAGISel::selectFrameIndex(SDNode *SN, SDNode *N, unsigned Offset) {
573   SDLoc dl(SN);
574   int FI = cast<FrameIndexSDNode>(N)->getIndex();
575   SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0));
576   unsigned Opc = N->getValueType(0) == MVT::i32 ? PPC::ADDI : PPC::ADDI8;
577   if (SN->hasOneUse())
578     CurDAG->SelectNodeTo(SN, Opc, N->getValueType(0), TFI,
579                          getSmallIPtrImm(Offset, dl));
580   else
581     ReplaceNode(SN, CurDAG->getMachineNode(Opc, dl, N->getValueType(0), TFI,
582                                            getSmallIPtrImm(Offset, dl)));
583 }
584 
585 bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask,
586                                       bool isShiftMask, unsigned &SH,
587                                       unsigned &MB, unsigned &ME) {
588   // Don't even go down this path for i64, since different logic will be
589   // necessary for rldicl/rldicr/rldimi.
590   if (N->getValueType(0) != MVT::i32)
591     return false;
592 
593   unsigned Shift  = 32;
594   unsigned Indeterminant = ~0;  // bit mask marking indeterminant results
595   unsigned Opcode = N->getOpcode();
596   if (N->getNumOperands() != 2 ||
597       !isInt32Immediate(N->getOperand(1).getNode(), Shift) || (Shift > 31))
598     return false;
599 
600   if (Opcode == ISD::SHL) {
601     // apply shift left to mask if it comes first
602     if (isShiftMask) Mask = Mask << Shift;
603     // determine which bits are made indeterminant by shift
604     Indeterminant = ~(0xFFFFFFFFu << Shift);
605   } else if (Opcode == ISD::SRL) {
606     // apply shift right to mask if it comes first
607     if (isShiftMask) Mask = Mask >> Shift;
608     // determine which bits are made indeterminant by shift
609     Indeterminant = ~(0xFFFFFFFFu >> Shift);
610     // adjust for the left rotate
611     Shift = 32 - Shift;
612   } else if (Opcode == ISD::ROTL) {
613     Indeterminant = 0;
614   } else {
615     return false;
616   }
617 
618   // if the mask doesn't intersect any Indeterminant bits
619   if (Mask && !(Mask & Indeterminant)) {
620     SH = Shift & 31;
621     // make sure the mask is still a mask (wrap arounds may not be)
622     return isRunOfOnes(Mask, MB, ME);
623   }
624   return false;
625 }
626 
627 bool PPCDAGToDAGISel::tryTLSXFormStore(StoreSDNode *ST) {
628   SDValue Base = ST->getBasePtr();
629   if (Base.getOpcode() != PPCISD::ADD_TLS)
630     return false;
631   SDValue Offset = ST->getOffset();
632   if (!Offset.isUndef())
633     return false;
634 
635   SDLoc dl(ST);
636   EVT MemVT = ST->getMemoryVT();
637   EVT RegVT = ST->getValue().getValueType();
638 
639   unsigned Opcode;
640   switch (MemVT.getSimpleVT().SimpleTy) {
641     default:
642       return false;
643     case MVT::i8: {
644       Opcode = (RegVT == MVT::i32) ? PPC::STBXTLS_32 : PPC::STBXTLS;
645       break;
646     }
647     case MVT::i16: {
648       Opcode = (RegVT == MVT::i32) ? PPC::STHXTLS_32 : PPC::STHXTLS;
649       break;
650     }
651     case MVT::i32: {
652       Opcode = (RegVT == MVT::i32) ? PPC::STWXTLS_32 : PPC::STWXTLS;
653       break;
654     }
655     case MVT::i64: {
656       Opcode = PPC::STDXTLS;
657       break;
658     }
659   }
660   SDValue Chain = ST->getChain();
661   SDVTList VTs = ST->getVTList();
662   SDValue Ops[] = {ST->getValue(), Base.getOperand(0), Base.getOperand(1),
663                    Chain};
664   SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
665   transferMemOperands(ST, MN);
666   ReplaceNode(ST, MN);
667   return true;
668 }
669 
670 bool PPCDAGToDAGISel::tryTLSXFormLoad(LoadSDNode *LD) {
671   SDValue Base = LD->getBasePtr();
672   if (Base.getOpcode() != PPCISD::ADD_TLS)
673     return false;
674   SDValue Offset = LD->getOffset();
675   if (!Offset.isUndef())
676     return false;
677 
678   SDLoc dl(LD);
679   EVT MemVT = LD->getMemoryVT();
680   EVT RegVT = LD->getValueType(0);
681   unsigned Opcode;
682   switch (MemVT.getSimpleVT().SimpleTy) {
683     default:
684       return false;
685     case MVT::i8: {
686       Opcode = (RegVT == MVT::i32) ? PPC::LBZXTLS_32 : PPC::LBZXTLS;
687       break;
688     }
689     case MVT::i16: {
690       Opcode = (RegVT == MVT::i32) ? PPC::LHZXTLS_32 : PPC::LHZXTLS;
691       break;
692     }
693     case MVT::i32: {
694       Opcode = (RegVT == MVT::i32) ? PPC::LWZXTLS_32 : PPC::LWZXTLS;
695       break;
696     }
697     case MVT::i64: {
698       Opcode = PPC::LDXTLS;
699       break;
700     }
701   }
702   SDValue Chain = LD->getChain();
703   SDVTList VTs = LD->getVTList();
704   SDValue Ops[] = {Base.getOperand(0), Base.getOperand(1), Chain};
705   SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
706   transferMemOperands(LD, MN);
707   ReplaceNode(LD, MN);
708   return true;
709 }
710 
711 /// Turn an or of two masked values into the rotate left word immediate then
712 /// mask insert (rlwimi) instruction.
713 bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) {
714   SDValue Op0 = N->getOperand(0);
715   SDValue Op1 = N->getOperand(1);
716   SDLoc dl(N);
717 
718   KnownBits LKnown = CurDAG->computeKnownBits(Op0);
719   KnownBits RKnown = CurDAG->computeKnownBits(Op1);
720 
721   unsigned TargetMask = LKnown.Zero.getZExtValue();
722   unsigned InsertMask = RKnown.Zero.getZExtValue();
723 
724   if ((TargetMask | InsertMask) == 0xFFFFFFFF) {
725     unsigned Op0Opc = Op0.getOpcode();
726     unsigned Op1Opc = Op1.getOpcode();
727     unsigned Value, SH = 0;
728     TargetMask = ~TargetMask;
729     InsertMask = ~InsertMask;
730 
731     // If the LHS has a foldable shift and the RHS does not, then swap it to the
732     // RHS so that we can fold the shift into the insert.
733     if (Op0Opc == ISD::AND && Op1Opc == ISD::AND) {
734       if (Op0.getOperand(0).getOpcode() == ISD::SHL ||
735           Op0.getOperand(0).getOpcode() == ISD::SRL) {
736         if (Op1.getOperand(0).getOpcode() != ISD::SHL &&
737             Op1.getOperand(0).getOpcode() != ISD::SRL) {
738           std::swap(Op0, Op1);
739           std::swap(Op0Opc, Op1Opc);
740           std::swap(TargetMask, InsertMask);
741         }
742       }
743     } else if (Op0Opc == ISD::SHL || Op0Opc == ISD::SRL) {
744       if (Op1Opc == ISD::AND && Op1.getOperand(0).getOpcode() != ISD::SHL &&
745           Op1.getOperand(0).getOpcode() != ISD::SRL) {
746         std::swap(Op0, Op1);
747         std::swap(Op0Opc, Op1Opc);
748         std::swap(TargetMask, InsertMask);
749       }
750     }
751 
752     unsigned MB, ME;
753     if (isRunOfOnes(InsertMask, MB, ME)) {
754       if ((Op1Opc == ISD::SHL || Op1Opc == ISD::SRL) &&
755           isInt32Immediate(Op1.getOperand(1), Value)) {
756         Op1 = Op1.getOperand(0);
757         SH  = (Op1Opc == ISD::SHL) ? Value : 32 - Value;
758       }
759       if (Op1Opc == ISD::AND) {
760        // The AND mask might not be a constant, and we need to make sure that
761        // if we're going to fold the masking with the insert, all bits not
762        // know to be zero in the mask are known to be one.
763         KnownBits MKnown = CurDAG->computeKnownBits(Op1.getOperand(1));
764         bool CanFoldMask = InsertMask == MKnown.One.getZExtValue();
765 
766         unsigned SHOpc = Op1.getOperand(0).getOpcode();
767         if ((SHOpc == ISD::SHL || SHOpc == ISD::SRL) && CanFoldMask &&
768             isInt32Immediate(Op1.getOperand(0).getOperand(1), Value)) {
769           // Note that Value must be in range here (less than 32) because
770           // otherwise there would not be any bits set in InsertMask.
771           Op1 = Op1.getOperand(0).getOperand(0);
772           SH  = (SHOpc == ISD::SHL) ? Value : 32 - Value;
773         }
774       }
775 
776       SH &= 31;
777       SDValue Ops[] = { Op0, Op1, getI32Imm(SH, dl), getI32Imm(MB, dl),
778                           getI32Imm(ME, dl) };
779       ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops));
780       return true;
781     }
782   }
783   return false;
784 }
785 
786 // Predict the number of instructions that would be generated by calling
787 // selectI64Imm(N).
788 static unsigned selectI64ImmInstrCountDirect(int64_t Imm) {
789   // Assume no remaining bits.
790   unsigned Remainder = 0;
791   // Assume no shift required.
792   unsigned Shift = 0;
793 
794   // If it can't be represented as a 32 bit value.
795   if (!isInt<32>(Imm)) {
796     Shift = countTrailingZeros<uint64_t>(Imm);
797     int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
798 
799     // If the shifted value fits 32 bits.
800     if (isInt<32>(ImmSh)) {
801       // Go with the shifted value.
802       Imm = ImmSh;
803     } else {
804       // Still stuck with a 64 bit value.
805       Remainder = Imm;
806       Shift = 32;
807       Imm >>= 32;
808     }
809   }
810 
811   // Intermediate operand.
812   unsigned Result = 0;
813 
814   // Handle first 32 bits.
815   unsigned Lo = Imm & 0xFFFF;
816 
817   // Simple value.
818   if (isInt<16>(Imm)) {
819     // Just the Lo bits.
820     ++Result;
821   } else if (Lo) {
822     // Handle the Hi bits and Lo bits.
823     Result += 2;
824   } else {
825     // Just the Hi bits.
826     ++Result;
827   }
828 
829   // If no shift, we're done.
830   if (!Shift) return Result;
831 
832   // If Hi word == Lo word,
833   // we can use rldimi to insert the Lo word into Hi word.
834   if ((unsigned)(Imm & 0xFFFFFFFF) == Remainder) {
835     ++Result;
836     return Result;
837   }
838 
839   // Shift for next step if the upper 32-bits were not zero.
840   if (Imm)
841     ++Result;
842 
843   // Add in the last bits as required.
844   if ((Remainder >> 16) & 0xFFFF)
845     ++Result;
846   if (Remainder & 0xFFFF)
847     ++Result;
848 
849   return Result;
850 }
851 
852 static uint64_t Rot64(uint64_t Imm, unsigned R) {
853   return (Imm << R) | (Imm >> (64 - R));
854 }
855 
856 static unsigned selectI64ImmInstrCount(int64_t Imm) {
857   unsigned Count = selectI64ImmInstrCountDirect(Imm);
858 
859   // If the instruction count is 1 or 2, we do not need further analysis
860   // since rotate + load constant requires at least 2 instructions.
861   if (Count <= 2)
862     return Count;
863 
864   for (unsigned r = 1; r < 63; ++r) {
865     uint64_t RImm = Rot64(Imm, r);
866     unsigned RCount = selectI64ImmInstrCountDirect(RImm) + 1;
867     Count = std::min(Count, RCount);
868 
869     // See comments in selectI64Imm for an explanation of the logic below.
870     unsigned LS = findLastSet(RImm);
871     if (LS != r-1)
872       continue;
873 
874     uint64_t OnesMask = -(int64_t) (UINT64_C(1) << (LS+1));
875     uint64_t RImmWithOnes = RImm | OnesMask;
876 
877     RCount = selectI64ImmInstrCountDirect(RImmWithOnes) + 1;
878     Count = std::min(Count, RCount);
879   }
880 
881   return Count;
882 }
883 
884 // Select a 64-bit constant. For cost-modeling purposes, selectI64ImmInstrCount
885 // (above) needs to be kept in sync with this function.
886 static SDNode *selectI64ImmDirect(SelectionDAG *CurDAG, const SDLoc &dl,
887                                   int64_t Imm) {
888   // Assume no remaining bits.
889   unsigned Remainder = 0;
890   // Assume no shift required.
891   unsigned Shift = 0;
892 
893   // If it can't be represented as a 32 bit value.
894   if (!isInt<32>(Imm)) {
895     Shift = countTrailingZeros<uint64_t>(Imm);
896     int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
897 
898     // If the shifted value fits 32 bits.
899     if (isInt<32>(ImmSh)) {
900       // Go with the shifted value.
901       Imm = ImmSh;
902     } else {
903       // Still stuck with a 64 bit value.
904       Remainder = Imm;
905       Shift = 32;
906       Imm >>= 32;
907     }
908   }
909 
910   // Intermediate operand.
911   SDNode *Result;
912 
913   // Handle first 32 bits.
914   unsigned Lo = Imm & 0xFFFF;
915   unsigned Hi = (Imm >> 16) & 0xFFFF;
916 
917   auto getI32Imm = [CurDAG, dl](unsigned Imm) {
918       return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
919   };
920 
921   // Simple value.
922   if (isInt<16>(Imm)) {
923     uint64_t SextImm = SignExtend64(Lo, 16);
924     SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64);
925     // Just the Lo bits.
926     Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm);
927   } else if (Lo) {
928     // Handle the Hi bits.
929     unsigned OpC = Hi ? PPC::LIS8 : PPC::LI8;
930     Result = CurDAG->getMachineNode(OpC, dl, MVT::i64, getI32Imm(Hi));
931     // And Lo bits.
932     Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
933                                     SDValue(Result, 0), getI32Imm(Lo));
934   } else {
935     // Just the Hi bits.
936     Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(Hi));
937   }
938 
939   // If no shift, we're done.
940   if (!Shift) return Result;
941 
942   // If Hi word == Lo word,
943   // we can use rldimi to insert the Lo word into Hi word.
944   if ((unsigned)(Imm & 0xFFFFFFFF) == Remainder) {
945     SDValue Ops[] =
946       { SDValue(Result, 0), SDValue(Result, 0), getI32Imm(Shift), getI32Imm(0)};
947     return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
948   }
949 
950   // Shift for next step if the upper 32-bits were not zero.
951   if (Imm) {
952     Result = CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64,
953                                     SDValue(Result, 0),
954                                     getI32Imm(Shift),
955                                     getI32Imm(63 - Shift));
956   }
957 
958   // Add in the last bits as required.
959   if ((Hi = (Remainder >> 16) & 0xFFFF)) {
960     Result = CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64,
961                                     SDValue(Result, 0), getI32Imm(Hi));
962   }
963   if ((Lo = Remainder & 0xFFFF)) {
964     Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
965                                     SDValue(Result, 0), getI32Imm(Lo));
966   }
967 
968   return Result;
969 }
970 
971 static SDNode *selectI64Imm(SelectionDAG *CurDAG, const SDLoc &dl,
972                             int64_t Imm) {
973   unsigned Count = selectI64ImmInstrCountDirect(Imm);
974 
975   // If the instruction count is 1 or 2, we do not need further analysis
976   // since rotate + load constant requires at least 2 instructions.
977   if (Count <= 2)
978     return selectI64ImmDirect(CurDAG, dl, Imm);
979 
980   unsigned RMin = 0;
981 
982   int64_t MatImm;
983   unsigned MaskEnd;
984 
985   for (unsigned r = 1; r < 63; ++r) {
986     uint64_t RImm = Rot64(Imm, r);
987     unsigned RCount = selectI64ImmInstrCountDirect(RImm) + 1;
988     if (RCount < Count) {
989       Count = RCount;
990       RMin = r;
991       MatImm = RImm;
992       MaskEnd = 63;
993     }
994 
995     // If the immediate to generate has many trailing zeros, it might be
996     // worthwhile to generate a rotated value with too many leading ones
997     // (because that's free with li/lis's sign-extension semantics), and then
998     // mask them off after rotation.
999 
1000     unsigned LS = findLastSet(RImm);
1001     // We're adding (63-LS) higher-order ones, and we expect to mask them off
1002     // after performing the inverse rotation by (64-r). So we need that:
1003     //   63-LS == 64-r => LS == r-1
1004     if (LS != r-1)
1005       continue;
1006 
1007     uint64_t OnesMask = -(int64_t) (UINT64_C(1) << (LS+1));
1008     uint64_t RImmWithOnes = RImm | OnesMask;
1009 
1010     RCount = selectI64ImmInstrCountDirect(RImmWithOnes) + 1;
1011     if (RCount < Count) {
1012       Count = RCount;
1013       RMin = r;
1014       MatImm = RImmWithOnes;
1015       MaskEnd = LS;
1016     }
1017   }
1018 
1019   if (!RMin)
1020     return selectI64ImmDirect(CurDAG, dl, Imm);
1021 
1022   auto getI32Imm = [CurDAG, dl](unsigned Imm) {
1023       return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
1024   };
1025 
1026   SDValue Val = SDValue(selectI64ImmDirect(CurDAG, dl, MatImm), 0);
1027   return CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Val,
1028                                 getI32Imm(64 - RMin), getI32Imm(MaskEnd));
1029 }
1030 
1031 static unsigned allUsesTruncate(SelectionDAG *CurDAG, SDNode *N) {
1032   unsigned MaxTruncation = 0;
1033   // Cannot use range-based for loop here as we need the actual use (i.e. we
1034   // need the operand number corresponding to the use). A range-based for
1035   // will unbox the use and provide an SDNode*.
1036   for (SDNode::use_iterator Use = N->use_begin(), UseEnd = N->use_end();
1037        Use != UseEnd; ++Use) {
1038     unsigned Opc =
1039       Use->isMachineOpcode() ? Use->getMachineOpcode() : Use->getOpcode();
1040     switch (Opc) {
1041     default: return 0;
1042     case ISD::TRUNCATE:
1043       if (Use->isMachineOpcode())
1044         return 0;
1045       MaxTruncation =
1046         std::max(MaxTruncation, Use->getValueType(0).getSizeInBits());
1047       continue;
1048     case ISD::STORE: {
1049       if (Use->isMachineOpcode())
1050         return 0;
1051       StoreSDNode *STN = cast<StoreSDNode>(*Use);
1052       unsigned MemVTSize = STN->getMemoryVT().getSizeInBits();
1053       if (MemVTSize == 64 || Use.getOperandNo() != 0)
1054         return 0;
1055       MaxTruncation = std::max(MaxTruncation, MemVTSize);
1056       continue;
1057     }
1058     case PPC::STW8:
1059     case PPC::STWX8:
1060     case PPC::STWU8:
1061     case PPC::STWUX8:
1062       if (Use.getOperandNo() != 0)
1063         return 0;
1064       MaxTruncation = std::max(MaxTruncation, 32u);
1065       continue;
1066     case PPC::STH8:
1067     case PPC::STHX8:
1068     case PPC::STHU8:
1069     case PPC::STHUX8:
1070       if (Use.getOperandNo() != 0)
1071         return 0;
1072       MaxTruncation = std::max(MaxTruncation, 16u);
1073       continue;
1074     case PPC::STB8:
1075     case PPC::STBX8:
1076     case PPC::STBU8:
1077     case PPC::STBUX8:
1078       if (Use.getOperandNo() != 0)
1079         return 0;
1080       MaxTruncation = std::max(MaxTruncation, 8u);
1081       continue;
1082     }
1083   }
1084   return MaxTruncation;
1085 }
1086 
1087 // Select a 64-bit constant.
1088 static SDNode *selectI64Imm(SelectionDAG *CurDAG, SDNode *N) {
1089   SDLoc dl(N);
1090 
1091   // Get 64 bit value.
1092   int64_t Imm = cast<ConstantSDNode>(N)->getZExtValue();
1093   if (unsigned MinSize = allUsesTruncate(CurDAG, N)) {
1094     uint64_t SextImm = SignExtend64(Imm, MinSize);
1095     SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64);
1096     if (isInt<16>(SextImm))
1097       return CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm);
1098   }
1099   return selectI64Imm(CurDAG, dl, Imm);
1100 }
1101 
1102 namespace {
1103 
1104 class BitPermutationSelector {
1105   struct ValueBit {
1106     SDValue V;
1107 
1108     // The bit number in the value, using a convention where bit 0 is the
1109     // lowest-order bit.
1110     unsigned Idx;
1111 
1112     // ConstZero means a bit we need to mask off.
1113     // Variable is a bit comes from an input variable.
1114     // VariableKnownToBeZero is also a bit comes from an input variable,
1115     // but it is known to be already zero. So we do not need to mask them.
1116     enum Kind {
1117       ConstZero,
1118       Variable,
1119       VariableKnownToBeZero
1120     } K;
1121 
1122     ValueBit(SDValue V, unsigned I, Kind K = Variable)
1123       : V(V), Idx(I), K(K) {}
1124     ValueBit(Kind K = Variable)
1125       : V(SDValue(nullptr, 0)), Idx(UINT32_MAX), K(K) {}
1126 
1127     bool isZero() const {
1128       return K == ConstZero || K == VariableKnownToBeZero;
1129     }
1130 
1131     bool hasValue() const {
1132       return K == Variable || K == VariableKnownToBeZero;
1133     }
1134 
1135     SDValue getValue() const {
1136       assert(hasValue() && "Cannot get the value of a constant bit");
1137       return V;
1138     }
1139 
1140     unsigned getValueBitIndex() const {
1141       assert(hasValue() && "Cannot get the value bit index of a constant bit");
1142       return Idx;
1143     }
1144   };
1145 
1146   // A bit group has the same underlying value and the same rotate factor.
1147   struct BitGroup {
1148     SDValue V;
1149     unsigned RLAmt;
1150     unsigned StartIdx, EndIdx;
1151 
1152     // This rotation amount assumes that the lower 32 bits of the quantity are
1153     // replicated in the high 32 bits by the rotation operator (which is done
1154     // by rlwinm and friends in 64-bit mode).
1155     bool Repl32;
1156     // Did converting to Repl32 == true change the rotation factor? If it did,
1157     // it decreased it by 32.
1158     bool Repl32CR;
1159     // Was this group coalesced after setting Repl32 to true?
1160     bool Repl32Coalesced;
1161 
1162     BitGroup(SDValue V, unsigned R, unsigned S, unsigned E)
1163       : V(V), RLAmt(R), StartIdx(S), EndIdx(E), Repl32(false), Repl32CR(false),
1164         Repl32Coalesced(false) {
1165       LLVM_DEBUG(dbgs() << "\tbit group for " << V.getNode() << " RLAmt = " << R
1166                         << " [" << S << ", " << E << "]\n");
1167     }
1168   };
1169 
1170   // Information on each (Value, RLAmt) pair (like the number of groups
1171   // associated with each) used to choose the lowering method.
1172   struct ValueRotInfo {
1173     SDValue V;
1174     unsigned RLAmt = std::numeric_limits<unsigned>::max();
1175     unsigned NumGroups = 0;
1176     unsigned FirstGroupStartIdx = std::numeric_limits<unsigned>::max();
1177     bool Repl32 = false;
1178 
1179     ValueRotInfo() = default;
1180 
1181     // For sorting (in reverse order) by NumGroups, and then by
1182     // FirstGroupStartIdx.
1183     bool operator < (const ValueRotInfo &Other) const {
1184       // We need to sort so that the non-Repl32 come first because, when we're
1185       // doing masking, the Repl32 bit groups might be subsumed into the 64-bit
1186       // masking operation.
1187       if (Repl32 < Other.Repl32)
1188         return true;
1189       else if (Repl32 > Other.Repl32)
1190         return false;
1191       else if (NumGroups > Other.NumGroups)
1192         return true;
1193       else if (NumGroups < Other.NumGroups)
1194         return false;
1195       else if (RLAmt == 0 && Other.RLAmt != 0)
1196         return true;
1197       else if (RLAmt != 0 && Other.RLAmt == 0)
1198         return false;
1199       else if (FirstGroupStartIdx < Other.FirstGroupStartIdx)
1200         return true;
1201       return false;
1202     }
1203   };
1204 
1205   using ValueBitsMemoizedValue = std::pair<bool, SmallVector<ValueBit, 64>>;
1206   using ValueBitsMemoizer =
1207       DenseMap<SDValue, std::unique_ptr<ValueBitsMemoizedValue>>;
1208   ValueBitsMemoizer Memoizer;
1209 
1210   // Return a pair of bool and a SmallVector pointer to a memoization entry.
1211   // The bool is true if something interesting was deduced, otherwise if we're
1212   // providing only a generic representation of V (or something else likewise
1213   // uninteresting for instruction selection) through the SmallVector.
1214   std::pair<bool, SmallVector<ValueBit, 64> *> getValueBits(SDValue V,
1215                                                             unsigned NumBits) {
1216     auto &ValueEntry = Memoizer[V];
1217     if (ValueEntry)
1218       return std::make_pair(ValueEntry->first, &ValueEntry->second);
1219     ValueEntry.reset(new ValueBitsMemoizedValue());
1220     bool &Interesting = ValueEntry->first;
1221     SmallVector<ValueBit, 64> &Bits = ValueEntry->second;
1222     Bits.resize(NumBits);
1223 
1224     switch (V.getOpcode()) {
1225     default: break;
1226     case ISD::ROTL:
1227       if (isa<ConstantSDNode>(V.getOperand(1))) {
1228         unsigned RotAmt = V.getConstantOperandVal(1);
1229 
1230         const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1231 
1232         for (unsigned i = 0; i < NumBits; ++i)
1233           Bits[i] = LHSBits[i < RotAmt ? i + (NumBits - RotAmt) : i - RotAmt];
1234 
1235         return std::make_pair(Interesting = true, &Bits);
1236       }
1237       break;
1238     case ISD::SHL:
1239       if (isa<ConstantSDNode>(V.getOperand(1))) {
1240         unsigned ShiftAmt = V.getConstantOperandVal(1);
1241 
1242         const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1243 
1244         for (unsigned i = ShiftAmt; i < NumBits; ++i)
1245           Bits[i] = LHSBits[i - ShiftAmt];
1246 
1247         for (unsigned i = 0; i < ShiftAmt; ++i)
1248           Bits[i] = ValueBit(ValueBit::ConstZero);
1249 
1250         return std::make_pair(Interesting = true, &Bits);
1251       }
1252       break;
1253     case ISD::SRL:
1254       if (isa<ConstantSDNode>(V.getOperand(1))) {
1255         unsigned ShiftAmt = V.getConstantOperandVal(1);
1256 
1257         const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1258 
1259         for (unsigned i = 0; i < NumBits - ShiftAmt; ++i)
1260           Bits[i] = LHSBits[i + ShiftAmt];
1261 
1262         for (unsigned i = NumBits - ShiftAmt; i < NumBits; ++i)
1263           Bits[i] = ValueBit(ValueBit::ConstZero);
1264 
1265         return std::make_pair(Interesting = true, &Bits);
1266       }
1267       break;
1268     case ISD::AND:
1269       if (isa<ConstantSDNode>(V.getOperand(1))) {
1270         uint64_t Mask = V.getConstantOperandVal(1);
1271 
1272         const SmallVector<ValueBit, 64> *LHSBits;
1273         // Mark this as interesting, only if the LHS was also interesting. This
1274         // prevents the overall procedure from matching a single immediate 'and'
1275         // (which is non-optimal because such an and might be folded with other
1276         // things if we don't select it here).
1277         std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0), NumBits);
1278 
1279         for (unsigned i = 0; i < NumBits; ++i)
1280           if (((Mask >> i) & 1) == 1)
1281             Bits[i] = (*LHSBits)[i];
1282           else {
1283             // AND instruction masks this bit. If the input is already zero,
1284             // we have nothing to do here. Otherwise, make the bit ConstZero.
1285             if ((*LHSBits)[i].isZero())
1286               Bits[i] = (*LHSBits)[i];
1287             else
1288               Bits[i] = ValueBit(ValueBit::ConstZero);
1289           }
1290 
1291         return std::make_pair(Interesting, &Bits);
1292       }
1293       break;
1294     case ISD::OR: {
1295       const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1296       const auto &RHSBits = *getValueBits(V.getOperand(1), NumBits).second;
1297 
1298       bool AllDisjoint = true;
1299       SDValue LastVal = SDValue();
1300       unsigned LastIdx = 0;
1301       for (unsigned i = 0; i < NumBits; ++i) {
1302         if (LHSBits[i].isZero() && RHSBits[i].isZero()) {
1303           // If both inputs are known to be zero and one is ConstZero and
1304           // another is VariableKnownToBeZero, we can select whichever
1305           // we like. To minimize the number of bit groups, we select
1306           // VariableKnownToBeZero if this bit is the next bit of the same
1307           // input variable from the previous bit. Otherwise, we select
1308           // ConstZero.
1309           if (LHSBits[i].hasValue() && LHSBits[i].getValue() == LastVal &&
1310               LHSBits[i].getValueBitIndex() == LastIdx + 1)
1311             Bits[i] = LHSBits[i];
1312           else if (RHSBits[i].hasValue() && RHSBits[i].getValue() == LastVal &&
1313                    RHSBits[i].getValueBitIndex() == LastIdx + 1)
1314             Bits[i] = RHSBits[i];
1315           else
1316             Bits[i] = ValueBit(ValueBit::ConstZero);
1317         }
1318         else if (LHSBits[i].isZero())
1319           Bits[i] = RHSBits[i];
1320         else if (RHSBits[i].isZero())
1321           Bits[i] = LHSBits[i];
1322         else {
1323           AllDisjoint = false;
1324           break;
1325         }
1326         // We remember the value and bit index of this bit.
1327         if (Bits[i].hasValue()) {
1328           LastVal = Bits[i].getValue();
1329           LastIdx = Bits[i].getValueBitIndex();
1330         }
1331         else {
1332           if (LastVal) LastVal = SDValue();
1333           LastIdx = 0;
1334         }
1335       }
1336 
1337       if (!AllDisjoint)
1338         break;
1339 
1340       return std::make_pair(Interesting = true, &Bits);
1341     }
1342     case ISD::ZERO_EXTEND: {
1343       // We support only the case with zero extension from i32 to i64 so far.
1344       if (V.getValueType() != MVT::i64 ||
1345           V.getOperand(0).getValueType() != MVT::i32)
1346         break;
1347 
1348       const SmallVector<ValueBit, 64> *LHSBits;
1349       const unsigned NumOperandBits = 32;
1350       std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0),
1351                                                     NumOperandBits);
1352 
1353       for (unsigned i = 0; i < NumOperandBits; ++i)
1354         Bits[i] = (*LHSBits)[i];
1355 
1356       for (unsigned i = NumOperandBits; i < NumBits; ++i)
1357         Bits[i] = ValueBit(ValueBit::ConstZero);
1358 
1359       return std::make_pair(Interesting, &Bits);
1360     }
1361     case ISD::TRUNCATE: {
1362       EVT FromType = V.getOperand(0).getValueType();
1363       EVT ToType = V.getValueType();
1364       // We support only the case with truncate from i64 to i32.
1365       if (FromType != MVT::i64 || ToType != MVT::i32)
1366         break;
1367       const unsigned NumAllBits = FromType.getSizeInBits();
1368       SmallVector<ValueBit, 64> *InBits;
1369       std::tie(Interesting, InBits) = getValueBits(V.getOperand(0),
1370                                                     NumAllBits);
1371       const unsigned NumValidBits = ToType.getSizeInBits();
1372 
1373       // A 32-bit instruction cannot touch upper 32-bit part of 64-bit value.
1374       // So, we cannot include this truncate.
1375       bool UseUpper32bit = false;
1376       for (unsigned i = 0; i < NumValidBits; ++i)
1377         if ((*InBits)[i].hasValue() && (*InBits)[i].getValueBitIndex() >= 32) {
1378           UseUpper32bit = true;
1379           break;
1380         }
1381       if (UseUpper32bit)
1382         break;
1383 
1384       for (unsigned i = 0; i < NumValidBits; ++i)
1385         Bits[i] = (*InBits)[i];
1386 
1387       return std::make_pair(Interesting, &Bits);
1388     }
1389     case ISD::AssertZext: {
1390       // For AssertZext, we look through the operand and
1391       // mark the bits known to be zero.
1392       const SmallVector<ValueBit, 64> *LHSBits;
1393       std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0),
1394                                                     NumBits);
1395 
1396       EVT FromType = cast<VTSDNode>(V.getOperand(1))->getVT();
1397       const unsigned NumValidBits = FromType.getSizeInBits();
1398       for (unsigned i = 0; i < NumValidBits; ++i)
1399         Bits[i] = (*LHSBits)[i];
1400 
1401       // These bits are known to be zero.
1402       for (unsigned i = NumValidBits; i < NumBits; ++i)
1403         Bits[i] = ValueBit((*LHSBits)[i].getValue(),
1404                            (*LHSBits)[i].getValueBitIndex(),
1405                            ValueBit::VariableKnownToBeZero);
1406 
1407       return std::make_pair(Interesting, &Bits);
1408     }
1409     case ISD::LOAD:
1410       LoadSDNode *LD = cast<LoadSDNode>(V);
1411       if (ISD::isZEXTLoad(V.getNode()) && V.getResNo() == 0) {
1412         EVT VT = LD->getMemoryVT();
1413         const unsigned NumValidBits = VT.getSizeInBits();
1414 
1415         for (unsigned i = 0; i < NumValidBits; ++i)
1416           Bits[i] = ValueBit(V, i);
1417 
1418         // These bits are known to be zero.
1419         for (unsigned i = NumValidBits; i < NumBits; ++i)
1420           Bits[i] = ValueBit(V, i, ValueBit::VariableKnownToBeZero);
1421 
1422         // Zero-extending load itself cannot be optimized. So, it is not
1423         // interesting by itself though it gives useful information.
1424         return std::make_pair(Interesting = false, &Bits);
1425       }
1426       break;
1427     }
1428 
1429     for (unsigned i = 0; i < NumBits; ++i)
1430       Bits[i] = ValueBit(V, i);
1431 
1432     return std::make_pair(Interesting = false, &Bits);
1433   }
1434 
1435   // For each value (except the constant ones), compute the left-rotate amount
1436   // to get it from its original to final position.
1437   void computeRotationAmounts() {
1438     NeedMask = false;
1439     RLAmt.resize(Bits.size());
1440     for (unsigned i = 0; i < Bits.size(); ++i)
1441       if (Bits[i].hasValue()) {
1442         unsigned VBI = Bits[i].getValueBitIndex();
1443         if (i >= VBI)
1444           RLAmt[i] = i - VBI;
1445         else
1446           RLAmt[i] = Bits.size() - (VBI - i);
1447       } else if (Bits[i].isZero()) {
1448         NeedMask = true;
1449         RLAmt[i] = UINT32_MAX;
1450       } else {
1451         llvm_unreachable("Unknown value bit type");
1452       }
1453   }
1454 
1455   // Collect groups of consecutive bits with the same underlying value and
1456   // rotation factor. If we're doing late masking, we ignore zeros, otherwise
1457   // they break up groups.
1458   void collectBitGroups(bool LateMask) {
1459     BitGroups.clear();
1460 
1461     unsigned LastRLAmt = RLAmt[0];
1462     SDValue LastValue = Bits[0].hasValue() ? Bits[0].getValue() : SDValue();
1463     unsigned LastGroupStartIdx = 0;
1464     bool IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue();
1465     for (unsigned i = 1; i < Bits.size(); ++i) {
1466       unsigned ThisRLAmt = RLAmt[i];
1467       SDValue ThisValue = Bits[i].hasValue() ? Bits[i].getValue() : SDValue();
1468       if (LateMask && !ThisValue) {
1469         ThisValue = LastValue;
1470         ThisRLAmt = LastRLAmt;
1471         // If we're doing late masking, then the first bit group always starts
1472         // at zero (even if the first bits were zero).
1473         if (BitGroups.empty())
1474           LastGroupStartIdx = 0;
1475       }
1476 
1477       // If this bit is known to be zero and the current group is a bit group
1478       // of zeros, we do not need to terminate the current bit group even the
1479       // Value or RLAmt does not match here. Instead, we terminate this group
1480       // when the first non-zero bit appears later.
1481       if (IsGroupOfZeros && Bits[i].isZero())
1482         continue;
1483 
1484       // If this bit has the same underlying value and the same rotate factor as
1485       // the last one, then they're part of the same group.
1486       if (ThisRLAmt == LastRLAmt && ThisValue == LastValue)
1487         // We cannot continue the current group if this bits is not known to
1488         // be zero in a bit group of zeros.
1489         if (!(IsGroupOfZeros && ThisValue && !Bits[i].isZero()))
1490           continue;
1491 
1492       if (LastValue.getNode())
1493         BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
1494                                      i-1));
1495       LastRLAmt = ThisRLAmt;
1496       LastValue = ThisValue;
1497       LastGroupStartIdx = i;
1498       IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue();
1499     }
1500     if (LastValue.getNode())
1501       BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
1502                                    Bits.size()-1));
1503 
1504     if (BitGroups.empty())
1505       return;
1506 
1507     // We might be able to combine the first and last groups.
1508     if (BitGroups.size() > 1) {
1509       // If the first and last groups are the same, then remove the first group
1510       // in favor of the last group, making the ending index of the last group
1511       // equal to the ending index of the to-be-removed first group.
1512       if (BitGroups[0].StartIdx == 0 &&
1513           BitGroups[BitGroups.size()-1].EndIdx == Bits.size()-1 &&
1514           BitGroups[0].V == BitGroups[BitGroups.size()-1].V &&
1515           BitGroups[0].RLAmt == BitGroups[BitGroups.size()-1].RLAmt) {
1516         LLVM_DEBUG(dbgs() << "\tcombining final bit group with initial one\n");
1517         BitGroups[BitGroups.size()-1].EndIdx = BitGroups[0].EndIdx;
1518         BitGroups.erase(BitGroups.begin());
1519       }
1520     }
1521   }
1522 
1523   // Take all (SDValue, RLAmt) pairs and sort them by the number of groups
1524   // associated with each. If the number of groups are same, we prefer a group
1525   // which does not require rotate, i.e. RLAmt is 0, to avoid the first rotate
1526   // instruction. If there is a degeneracy, pick the one that occurs
1527   // first (in the final value).
1528   void collectValueRotInfo() {
1529     ValueRots.clear();
1530 
1531     for (auto &BG : BitGroups) {
1532       unsigned RLAmtKey = BG.RLAmt + (BG.Repl32 ? 64 : 0);
1533       ValueRotInfo &VRI = ValueRots[std::make_pair(BG.V, RLAmtKey)];
1534       VRI.V = BG.V;
1535       VRI.RLAmt = BG.RLAmt;
1536       VRI.Repl32 = BG.Repl32;
1537       VRI.NumGroups += 1;
1538       VRI.FirstGroupStartIdx = std::min(VRI.FirstGroupStartIdx, BG.StartIdx);
1539     }
1540 
1541     // Now that we've collected the various ValueRotInfo instances, we need to
1542     // sort them.
1543     ValueRotsVec.clear();
1544     for (auto &I : ValueRots) {
1545       ValueRotsVec.push_back(I.second);
1546     }
1547     llvm::sort(ValueRotsVec);
1548   }
1549 
1550   // In 64-bit mode, rlwinm and friends have a rotation operator that
1551   // replicates the low-order 32 bits into the high-order 32-bits. The mask
1552   // indices of these instructions can only be in the lower 32 bits, so they
1553   // can only represent some 64-bit bit groups. However, when they can be used,
1554   // the 32-bit replication can be used to represent, as a single bit group,
1555   // otherwise separate bit groups. We'll convert to replicated-32-bit bit
1556   // groups when possible. Returns true if any of the bit groups were
1557   // converted.
1558   void assignRepl32BitGroups() {
1559     // If we have bits like this:
1560     //
1561     // Indices:    15 14 13 12 11 10 9 8  7  6  5  4  3  2  1  0
1562     // V bits: ... 7  6  5  4  3  2  1 0 31 30 29 28 27 26 25 24
1563     // Groups:    |      RLAmt = 8      |      RLAmt = 40       |
1564     //
1565     // But, making use of a 32-bit operation that replicates the low-order 32
1566     // bits into the high-order 32 bits, this can be one bit group with a RLAmt
1567     // of 8.
1568 
1569     auto IsAllLow32 = [this](BitGroup & BG) {
1570       if (BG.StartIdx <= BG.EndIdx) {
1571         for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i) {
1572           if (!Bits[i].hasValue())
1573             continue;
1574           if (Bits[i].getValueBitIndex() >= 32)
1575             return false;
1576         }
1577       } else {
1578         for (unsigned i = BG.StartIdx; i < Bits.size(); ++i) {
1579           if (!Bits[i].hasValue())
1580             continue;
1581           if (Bits[i].getValueBitIndex() >= 32)
1582             return false;
1583         }
1584         for (unsigned i = 0; i <= BG.EndIdx; ++i) {
1585           if (!Bits[i].hasValue())
1586             continue;
1587           if (Bits[i].getValueBitIndex() >= 32)
1588             return false;
1589         }
1590       }
1591 
1592       return true;
1593     };
1594 
1595     for (auto &BG : BitGroups) {
1596       // If this bit group has RLAmt of 0 and will not be merged with
1597       // another bit group, we don't benefit from Repl32. We don't mark
1598       // such group to give more freedom for later instruction selection.
1599       if (BG.RLAmt == 0) {
1600         auto PotentiallyMerged = [this](BitGroup & BG) {
1601           for (auto &BG2 : BitGroups)
1602             if (&BG != &BG2 && BG.V == BG2.V &&
1603                 (BG2.RLAmt == 0 || BG2.RLAmt == 32))
1604               return true;
1605           return false;
1606         };
1607         if (!PotentiallyMerged(BG))
1608           continue;
1609       }
1610       if (BG.StartIdx < 32 && BG.EndIdx < 32) {
1611         if (IsAllLow32(BG)) {
1612           if (BG.RLAmt >= 32) {
1613             BG.RLAmt -= 32;
1614             BG.Repl32CR = true;
1615           }
1616 
1617           BG.Repl32 = true;
1618 
1619           LLVM_DEBUG(dbgs() << "\t32-bit replicated bit group for "
1620                             << BG.V.getNode() << " RLAmt = " << BG.RLAmt << " ["
1621                             << BG.StartIdx << ", " << BG.EndIdx << "]\n");
1622         }
1623       }
1624     }
1625 
1626     // Now walk through the bit groups, consolidating where possible.
1627     for (auto I = BitGroups.begin(); I != BitGroups.end();) {
1628       // We might want to remove this bit group by merging it with the previous
1629       // group (which might be the ending group).
1630       auto IP = (I == BitGroups.begin()) ?
1631                 std::prev(BitGroups.end()) : std::prev(I);
1632       if (I->Repl32 && IP->Repl32 && I->V == IP->V && I->RLAmt == IP->RLAmt &&
1633           I->StartIdx == (IP->EndIdx + 1) % 64 && I != IP) {
1634 
1635         LLVM_DEBUG(dbgs() << "\tcombining 32-bit replicated bit group for "
1636                           << I->V.getNode() << " RLAmt = " << I->RLAmt << " ["
1637                           << I->StartIdx << ", " << I->EndIdx
1638                           << "] with group with range [" << IP->StartIdx << ", "
1639                           << IP->EndIdx << "]\n");
1640 
1641         IP->EndIdx = I->EndIdx;
1642         IP->Repl32CR = IP->Repl32CR || I->Repl32CR;
1643         IP->Repl32Coalesced = true;
1644         I = BitGroups.erase(I);
1645         continue;
1646       } else {
1647         // There is a special case worth handling: If there is a single group
1648         // covering the entire upper 32 bits, and it can be merged with both
1649         // the next and previous groups (which might be the same group), then
1650         // do so. If it is the same group (so there will be only one group in
1651         // total), then we need to reverse the order of the range so that it
1652         // covers the entire 64 bits.
1653         if (I->StartIdx == 32 && I->EndIdx == 63) {
1654           assert(std::next(I) == BitGroups.end() &&
1655                  "bit group ends at index 63 but there is another?");
1656           auto IN = BitGroups.begin();
1657 
1658           if (IP->Repl32 && IN->Repl32 && I->V == IP->V && I->V == IN->V &&
1659               (I->RLAmt % 32) == IP->RLAmt && (I->RLAmt % 32) == IN->RLAmt &&
1660               IP->EndIdx == 31 && IN->StartIdx == 0 && I != IP &&
1661               IsAllLow32(*I)) {
1662 
1663             LLVM_DEBUG(dbgs() << "\tcombining bit group for " << I->V.getNode()
1664                               << " RLAmt = " << I->RLAmt << " [" << I->StartIdx
1665                               << ", " << I->EndIdx
1666                               << "] with 32-bit replicated groups with ranges ["
1667                               << IP->StartIdx << ", " << IP->EndIdx << "] and ["
1668                               << IN->StartIdx << ", " << IN->EndIdx << "]\n");
1669 
1670             if (IP == IN) {
1671               // There is only one other group; change it to cover the whole
1672               // range (backward, so that it can still be Repl32 but cover the
1673               // whole 64-bit range).
1674               IP->StartIdx = 31;
1675               IP->EndIdx = 30;
1676               IP->Repl32CR = IP->Repl32CR || I->RLAmt >= 32;
1677               IP->Repl32Coalesced = true;
1678               I = BitGroups.erase(I);
1679             } else {
1680               // There are two separate groups, one before this group and one
1681               // after us (at the beginning). We're going to remove this group,
1682               // but also the group at the very beginning.
1683               IP->EndIdx = IN->EndIdx;
1684               IP->Repl32CR = IP->Repl32CR || IN->Repl32CR || I->RLAmt >= 32;
1685               IP->Repl32Coalesced = true;
1686               I = BitGroups.erase(I);
1687               BitGroups.erase(BitGroups.begin());
1688             }
1689 
1690             // This must be the last group in the vector (and we might have
1691             // just invalidated the iterator above), so break here.
1692             break;
1693           }
1694         }
1695       }
1696 
1697       ++I;
1698     }
1699   }
1700 
1701   SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
1702     return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
1703   }
1704 
1705   uint64_t getZerosMask() {
1706     uint64_t Mask = 0;
1707     for (unsigned i = 0; i < Bits.size(); ++i) {
1708       if (Bits[i].hasValue())
1709         continue;
1710       Mask |= (UINT64_C(1) << i);
1711     }
1712 
1713     return ~Mask;
1714   }
1715 
1716   // This method extends an input value to 64 bit if input is 32-bit integer.
1717   // While selecting instructions in BitPermutationSelector in 64-bit mode,
1718   // an input value can be a 32-bit integer if a ZERO_EXTEND node is included.
1719   // In such case, we extend it to 64 bit to be consistent with other values.
1720   SDValue ExtendToInt64(SDValue V, const SDLoc &dl) {
1721     if (V.getValueSizeInBits() == 64)
1722       return V;
1723 
1724     assert(V.getValueSizeInBits() == 32);
1725     SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
1726     SDValue ImDef = SDValue(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl,
1727                                                    MVT::i64), 0);
1728     SDValue ExtVal = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl,
1729                                                     MVT::i64, ImDef, V,
1730                                                     SubRegIdx), 0);
1731     return ExtVal;
1732   }
1733 
1734   SDValue TruncateToInt32(SDValue V, const SDLoc &dl) {
1735     if (V.getValueSizeInBits() == 32)
1736       return V;
1737 
1738     assert(V.getValueSizeInBits() == 64);
1739     SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
1740     SDValue SubVal = SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl,
1741                                                     MVT::i32, V, SubRegIdx), 0);
1742     return SubVal;
1743   }
1744 
1745   // Depending on the number of groups for a particular value, it might be
1746   // better to rotate, mask explicitly (using andi/andis), and then or the
1747   // result. Select this part of the result first.
1748   void SelectAndParts32(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) {
1749     if (BPermRewriterNoMasking)
1750       return;
1751 
1752     for (ValueRotInfo &VRI : ValueRotsVec) {
1753       unsigned Mask = 0;
1754       for (unsigned i = 0; i < Bits.size(); ++i) {
1755         if (!Bits[i].hasValue() || Bits[i].getValue() != VRI.V)
1756           continue;
1757         if (RLAmt[i] != VRI.RLAmt)
1758           continue;
1759         Mask |= (1u << i);
1760       }
1761 
1762       // Compute the masks for andi/andis that would be necessary.
1763       unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16;
1764       assert((ANDIMask != 0 || ANDISMask != 0) &&
1765              "No set bits in mask for value bit groups");
1766       bool NeedsRotate = VRI.RLAmt != 0;
1767 
1768       // We're trying to minimize the number of instructions. If we have one
1769       // group, using one of andi/andis can break even.  If we have three
1770       // groups, we can use both andi and andis and break even (to use both
1771       // andi and andis we also need to or the results together). We need four
1772       // groups if we also need to rotate. To use andi/andis we need to do more
1773       // than break even because rotate-and-mask instructions tend to be easier
1774       // to schedule.
1775 
1776       // FIXME: We've biased here against using andi/andis, which is right for
1777       // POWER cores, but not optimal everywhere. For example, on the A2,
1778       // andi/andis have single-cycle latency whereas the rotate-and-mask
1779       // instructions take two cycles, and it would be better to bias toward
1780       // andi/andis in break-even cases.
1781 
1782       unsigned NumAndInsts = (unsigned) NeedsRotate +
1783                              (unsigned) (ANDIMask != 0) +
1784                              (unsigned) (ANDISMask != 0) +
1785                              (unsigned) (ANDIMask != 0 && ANDISMask != 0) +
1786                              (unsigned) (bool) Res;
1787 
1788       LLVM_DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode()
1789                         << " RL: " << VRI.RLAmt << ":"
1790                         << "\n\t\t\tisel using masking: " << NumAndInsts
1791                         << " using rotates: " << VRI.NumGroups << "\n");
1792 
1793       if (NumAndInsts >= VRI.NumGroups)
1794         continue;
1795 
1796       LLVM_DEBUG(dbgs() << "\t\t\t\tusing masking\n");
1797 
1798       if (InstCnt) *InstCnt += NumAndInsts;
1799 
1800       SDValue VRot;
1801       if (VRI.RLAmt) {
1802         SDValue Ops[] =
1803           { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl),
1804             getI32Imm(0, dl), getI32Imm(31, dl) };
1805         VRot = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
1806                                               Ops), 0);
1807       } else {
1808         VRot = TruncateToInt32(VRI.V, dl);
1809       }
1810 
1811       SDValue ANDIVal, ANDISVal;
1812       if (ANDIMask != 0)
1813         ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo, dl, MVT::i32,
1814                             VRot, getI32Imm(ANDIMask, dl)), 0);
1815       if (ANDISMask != 0)
1816         ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo, dl, MVT::i32,
1817                              VRot, getI32Imm(ANDISMask, dl)), 0);
1818 
1819       SDValue TotalVal;
1820       if (!ANDIVal)
1821         TotalVal = ANDISVal;
1822       else if (!ANDISVal)
1823         TotalVal = ANDIVal;
1824       else
1825         TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
1826                              ANDIVal, ANDISVal), 0);
1827 
1828       if (!Res)
1829         Res = TotalVal;
1830       else
1831         Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
1832                         Res, TotalVal), 0);
1833 
1834       // Now, remove all groups with this underlying value and rotation
1835       // factor.
1836       eraseMatchingBitGroups([VRI](const BitGroup &BG) {
1837         return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt;
1838       });
1839     }
1840   }
1841 
1842   // Instruction selection for the 32-bit case.
1843   SDNode *Select32(SDNode *N, bool LateMask, unsigned *InstCnt) {
1844     SDLoc dl(N);
1845     SDValue Res;
1846 
1847     if (InstCnt) *InstCnt = 0;
1848 
1849     // Take care of cases that should use andi/andis first.
1850     SelectAndParts32(dl, Res, InstCnt);
1851 
1852     // If we've not yet selected a 'starting' instruction, and we have no zeros
1853     // to fill in, select the (Value, RLAmt) with the highest priority (largest
1854     // number of groups), and start with this rotated value.
1855     if ((!NeedMask || LateMask) && !Res) {
1856       ValueRotInfo &VRI = ValueRotsVec[0];
1857       if (VRI.RLAmt) {
1858         if (InstCnt) *InstCnt += 1;
1859         SDValue Ops[] =
1860           { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl),
1861             getI32Imm(0, dl), getI32Imm(31, dl) };
1862         Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops),
1863                       0);
1864       } else {
1865         Res = TruncateToInt32(VRI.V, dl);
1866       }
1867 
1868       // Now, remove all groups with this underlying value and rotation factor.
1869       eraseMatchingBitGroups([VRI](const BitGroup &BG) {
1870         return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt;
1871       });
1872     }
1873 
1874     if (InstCnt) *InstCnt += BitGroups.size();
1875 
1876     // Insert the other groups (one at a time).
1877     for (auto &BG : BitGroups) {
1878       if (!Res) {
1879         SDValue Ops[] =
1880           { TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl),
1881             getI32Imm(Bits.size() - BG.EndIdx - 1, dl),
1882             getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };
1883         Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
1884       } else {
1885         SDValue Ops[] =
1886           { Res, TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl),
1887               getI32Imm(Bits.size() - BG.EndIdx - 1, dl),
1888             getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };
1889         Res = SDValue(CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops), 0);
1890       }
1891     }
1892 
1893     if (LateMask) {
1894       unsigned Mask = (unsigned) getZerosMask();
1895 
1896       unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16;
1897       assert((ANDIMask != 0 || ANDISMask != 0) &&
1898              "No set bits in zeros mask?");
1899 
1900       if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) +
1901                                (unsigned) (ANDISMask != 0) +
1902                                (unsigned) (ANDIMask != 0 && ANDISMask != 0);
1903 
1904       SDValue ANDIVal, ANDISVal;
1905       if (ANDIMask != 0)
1906         ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo, dl, MVT::i32,
1907                             Res, getI32Imm(ANDIMask, dl)), 0);
1908       if (ANDISMask != 0)
1909         ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo, dl, MVT::i32,
1910                              Res, getI32Imm(ANDISMask, dl)), 0);
1911 
1912       if (!ANDIVal)
1913         Res = ANDISVal;
1914       else if (!ANDISVal)
1915         Res = ANDIVal;
1916       else
1917         Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
1918                         ANDIVal, ANDISVal), 0);
1919     }
1920 
1921     return Res.getNode();
1922   }
1923 
1924   unsigned SelectRotMask64Count(unsigned RLAmt, bool Repl32,
1925                                 unsigned MaskStart, unsigned MaskEnd,
1926                                 bool IsIns) {
1927     // In the notation used by the instructions, 'start' and 'end' are reversed
1928     // because bits are counted from high to low order.
1929     unsigned InstMaskStart = 64 - MaskEnd - 1,
1930              InstMaskEnd   = 64 - MaskStart - 1;
1931 
1932     if (Repl32)
1933       return 1;
1934 
1935     if ((!IsIns && (InstMaskEnd == 63 || InstMaskStart == 0)) ||
1936         InstMaskEnd == 63 - RLAmt)
1937       return 1;
1938 
1939     return 2;
1940   }
1941 
1942   // For 64-bit values, not all combinations of rotates and masks are
1943   // available. Produce one if it is available.
1944   SDValue SelectRotMask64(SDValue V, const SDLoc &dl, unsigned RLAmt,
1945                           bool Repl32, unsigned MaskStart, unsigned MaskEnd,
1946                           unsigned *InstCnt = nullptr) {
1947     // In the notation used by the instructions, 'start' and 'end' are reversed
1948     // because bits are counted from high to low order.
1949     unsigned InstMaskStart = 64 - MaskEnd - 1,
1950              InstMaskEnd   = 64 - MaskStart - 1;
1951 
1952     if (InstCnt) *InstCnt += 1;
1953 
1954     if (Repl32) {
1955       // This rotation amount assumes that the lower 32 bits of the quantity
1956       // are replicated in the high 32 bits by the rotation operator (which is
1957       // done by rlwinm and friends).
1958       assert(InstMaskStart >= 32 && "Mask cannot start out of range");
1959       assert(InstMaskEnd   >= 32 && "Mask cannot end out of range");
1960       SDValue Ops[] =
1961         { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
1962           getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) };
1963       return SDValue(CurDAG->getMachineNode(PPC::RLWINM8, dl, MVT::i64,
1964                                             Ops), 0);
1965     }
1966 
1967     if (InstMaskEnd == 63) {
1968       SDValue Ops[] =
1969         { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
1970           getI32Imm(InstMaskStart, dl) };
1971       return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Ops), 0);
1972     }
1973 
1974     if (InstMaskStart == 0) {
1975       SDValue Ops[] =
1976         { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
1977           getI32Imm(InstMaskEnd, dl) };
1978       return SDValue(CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Ops), 0);
1979     }
1980 
1981     if (InstMaskEnd == 63 - RLAmt) {
1982       SDValue Ops[] =
1983         { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
1984           getI32Imm(InstMaskStart, dl) };
1985       return SDValue(CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, Ops), 0);
1986     }
1987 
1988     // We cannot do this with a single instruction, so we'll use two. The
1989     // problem is that we're not free to choose both a rotation amount and mask
1990     // start and end independently. We can choose an arbitrary mask start and
1991     // end, but then the rotation amount is fixed. Rotation, however, can be
1992     // inverted, and so by applying an "inverse" rotation first, we can get the
1993     // desired result.
1994     if (InstCnt) *InstCnt += 1;
1995 
1996     // The rotation mask for the second instruction must be MaskStart.
1997     unsigned RLAmt2 = MaskStart;
1998     // The first instruction must rotate V so that the overall rotation amount
1999     // is RLAmt.
2000     unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64;
2001     if (RLAmt1)
2002       V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63);
2003     return SelectRotMask64(V, dl, RLAmt2, false, MaskStart, MaskEnd);
2004   }
2005 
2006   // For 64-bit values, not all combinations of rotates and masks are
2007   // available. Produce a rotate-mask-and-insert if one is available.
2008   SDValue SelectRotMaskIns64(SDValue Base, SDValue V, const SDLoc &dl,
2009                              unsigned RLAmt, bool Repl32, unsigned MaskStart,
2010                              unsigned MaskEnd, unsigned *InstCnt = nullptr) {
2011     // In the notation used by the instructions, 'start' and 'end' are reversed
2012     // because bits are counted from high to low order.
2013     unsigned InstMaskStart = 64 - MaskEnd - 1,
2014              InstMaskEnd   = 64 - MaskStart - 1;
2015 
2016     if (InstCnt) *InstCnt += 1;
2017 
2018     if (Repl32) {
2019       // This rotation amount assumes that the lower 32 bits of the quantity
2020       // are replicated in the high 32 bits by the rotation operator (which is
2021       // done by rlwinm and friends).
2022       assert(InstMaskStart >= 32 && "Mask cannot start out of range");
2023       assert(InstMaskEnd   >= 32 && "Mask cannot end out of range");
2024       SDValue Ops[] =
2025         { ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2026           getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) };
2027       return SDValue(CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64,
2028                                             Ops), 0);
2029     }
2030 
2031     if (InstMaskEnd == 63 - RLAmt) {
2032       SDValue Ops[] =
2033         { ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2034           getI32Imm(InstMaskStart, dl) };
2035       return SDValue(CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops), 0);
2036     }
2037 
2038     // We cannot do this with a single instruction, so we'll use two. The
2039     // problem is that we're not free to choose both a rotation amount and mask
2040     // start and end independently. We can choose an arbitrary mask start and
2041     // end, but then the rotation amount is fixed. Rotation, however, can be
2042     // inverted, and so by applying an "inverse" rotation first, we can get the
2043     // desired result.
2044     if (InstCnt) *InstCnt += 1;
2045 
2046     // The rotation mask for the second instruction must be MaskStart.
2047     unsigned RLAmt2 = MaskStart;
2048     // The first instruction must rotate V so that the overall rotation amount
2049     // is RLAmt.
2050     unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64;
2051     if (RLAmt1)
2052       V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63);
2053     return SelectRotMaskIns64(Base, V, dl, RLAmt2, false, MaskStart, MaskEnd);
2054   }
2055 
2056   void SelectAndParts64(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) {
2057     if (BPermRewriterNoMasking)
2058       return;
2059 
2060     // The idea here is the same as in the 32-bit version, but with additional
2061     // complications from the fact that Repl32 might be true. Because we
2062     // aggressively convert bit groups to Repl32 form (which, for small
2063     // rotation factors, involves no other change), and then coalesce, it might
2064     // be the case that a single 64-bit masking operation could handle both
2065     // some Repl32 groups and some non-Repl32 groups. If converting to Repl32
2066     // form allowed coalescing, then we must use a 32-bit rotaton in order to
2067     // completely capture the new combined bit group.
2068 
2069     for (ValueRotInfo &VRI : ValueRotsVec) {
2070       uint64_t Mask = 0;
2071 
2072       // We need to add to the mask all bits from the associated bit groups.
2073       // If Repl32 is false, we need to add bits from bit groups that have
2074       // Repl32 true, but are trivially convertable to Repl32 false. Such a
2075       // group is trivially convertable if it overlaps only with the lower 32
2076       // bits, and the group has not been coalesced.
2077       auto MatchingBG = [VRI](const BitGroup &BG) {
2078         if (VRI.V != BG.V)
2079           return false;
2080 
2081         unsigned EffRLAmt = BG.RLAmt;
2082         if (!VRI.Repl32 && BG.Repl32) {
2083           if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx <= BG.EndIdx &&
2084               !BG.Repl32Coalesced) {
2085             if (BG.Repl32CR)
2086               EffRLAmt += 32;
2087           } else {
2088             return false;
2089           }
2090         } else if (VRI.Repl32 != BG.Repl32) {
2091           return false;
2092         }
2093 
2094         return VRI.RLAmt == EffRLAmt;
2095       };
2096 
2097       for (auto &BG : BitGroups) {
2098         if (!MatchingBG(BG))
2099           continue;
2100 
2101         if (BG.StartIdx <= BG.EndIdx) {
2102           for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i)
2103             Mask |= (UINT64_C(1) << i);
2104         } else {
2105           for (unsigned i = BG.StartIdx; i < Bits.size(); ++i)
2106             Mask |= (UINT64_C(1) << i);
2107           for (unsigned i = 0; i <= BG.EndIdx; ++i)
2108             Mask |= (UINT64_C(1) << i);
2109         }
2110       }
2111 
2112       // We can use the 32-bit andi/andis technique if the mask does not
2113       // require any higher-order bits. This can save an instruction compared
2114       // to always using the general 64-bit technique.
2115       bool Use32BitInsts = isUInt<32>(Mask);
2116       // Compute the masks for andi/andis that would be necessary.
2117       unsigned ANDIMask = (Mask & UINT16_MAX),
2118                ANDISMask = (Mask >> 16) & UINT16_MAX;
2119 
2120       bool NeedsRotate = VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask));
2121 
2122       unsigned NumAndInsts = (unsigned) NeedsRotate +
2123                              (unsigned) (bool) Res;
2124       if (Use32BitInsts)
2125         NumAndInsts += (unsigned) (ANDIMask != 0) + (unsigned) (ANDISMask != 0) +
2126                        (unsigned) (ANDIMask != 0 && ANDISMask != 0);
2127       else
2128         NumAndInsts += selectI64ImmInstrCount(Mask) + /* and */ 1;
2129 
2130       unsigned NumRLInsts = 0;
2131       bool FirstBG = true;
2132       bool MoreBG = false;
2133       for (auto &BG : BitGroups) {
2134         if (!MatchingBG(BG)) {
2135           MoreBG = true;
2136           continue;
2137         }
2138         NumRLInsts +=
2139           SelectRotMask64Count(BG.RLAmt, BG.Repl32, BG.StartIdx, BG.EndIdx,
2140                                !FirstBG);
2141         FirstBG = false;
2142       }
2143 
2144       LLVM_DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode()
2145                         << " RL: " << VRI.RLAmt << (VRI.Repl32 ? " (32):" : ":")
2146                         << "\n\t\t\tisel using masking: " << NumAndInsts
2147                         << " using rotates: " << NumRLInsts << "\n");
2148 
2149       // When we'd use andi/andis, we bias toward using the rotates (andi only
2150       // has a record form, and is cracked on POWER cores). However, when using
2151       // general 64-bit constant formation, bias toward the constant form,
2152       // because that exposes more opportunities for CSE.
2153       if (NumAndInsts > NumRLInsts)
2154         continue;
2155       // When merging multiple bit groups, instruction or is used.
2156       // But when rotate is used, rldimi can inert the rotated value into any
2157       // register, so instruction or can be avoided.
2158       if ((Use32BitInsts || MoreBG) && NumAndInsts == NumRLInsts)
2159         continue;
2160 
2161       LLVM_DEBUG(dbgs() << "\t\t\t\tusing masking\n");
2162 
2163       if (InstCnt) *InstCnt += NumAndInsts;
2164 
2165       SDValue VRot;
2166       // We actually need to generate a rotation if we have a non-zero rotation
2167       // factor or, in the Repl32 case, if we care about any of the
2168       // higher-order replicated bits. In the latter case, we generate a mask
2169       // backward so that it actually includes the entire 64 bits.
2170       if (VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask)))
2171         VRot = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32,
2172                                VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63);
2173       else
2174         VRot = VRI.V;
2175 
2176       SDValue TotalVal;
2177       if (Use32BitInsts) {
2178         assert((ANDIMask != 0 || ANDISMask != 0) &&
2179                "No set bits in mask when using 32-bit ands for 64-bit value");
2180 
2181         SDValue ANDIVal, ANDISVal;
2182         if (ANDIMask != 0)
2183           ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo8, dl, MVT::i64,
2184                                                    ExtendToInt64(VRot, dl),
2185                                                    getI32Imm(ANDIMask, dl)),
2186                             0);
2187         if (ANDISMask != 0)
2188           ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo8, dl, MVT::i64,
2189                                                     ExtendToInt64(VRot, dl),
2190                                                     getI32Imm(ANDISMask, dl)),
2191                              0);
2192 
2193         if (!ANDIVal)
2194           TotalVal = ANDISVal;
2195         else if (!ANDISVal)
2196           TotalVal = ANDIVal;
2197         else
2198           TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
2199                                ExtendToInt64(ANDIVal, dl), ANDISVal), 0);
2200       } else {
2201         TotalVal = SDValue(selectI64Imm(CurDAG, dl, Mask), 0);
2202         TotalVal =
2203           SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64,
2204                                          ExtendToInt64(VRot, dl), TotalVal),
2205                   0);
2206      }
2207 
2208       if (!Res)
2209         Res = TotalVal;
2210       else
2211         Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
2212                                              ExtendToInt64(Res, dl), TotalVal),
2213                       0);
2214 
2215       // Now, remove all groups with this underlying value and rotation
2216       // factor.
2217       eraseMatchingBitGroups(MatchingBG);
2218     }
2219   }
2220 
2221   // Instruction selection for the 64-bit case.
2222   SDNode *Select64(SDNode *N, bool LateMask, unsigned *InstCnt) {
2223     SDLoc dl(N);
2224     SDValue Res;
2225 
2226     if (InstCnt) *InstCnt = 0;
2227 
2228     // Take care of cases that should use andi/andis first.
2229     SelectAndParts64(dl, Res, InstCnt);
2230 
2231     // If we've not yet selected a 'starting' instruction, and we have no zeros
2232     // to fill in, select the (Value, RLAmt) with the highest priority (largest
2233     // number of groups), and start with this rotated value.
2234     if ((!NeedMask || LateMask) && !Res) {
2235       // If we have both Repl32 groups and non-Repl32 groups, the non-Repl32
2236       // groups will come first, and so the VRI representing the largest number
2237       // of groups might not be first (it might be the first Repl32 groups).
2238       unsigned MaxGroupsIdx = 0;
2239       if (!ValueRotsVec[0].Repl32) {
2240         for (unsigned i = 0, ie = ValueRotsVec.size(); i < ie; ++i)
2241           if (ValueRotsVec[i].Repl32) {
2242             if (ValueRotsVec[i].NumGroups > ValueRotsVec[0].NumGroups)
2243               MaxGroupsIdx = i;
2244             break;
2245           }
2246       }
2247 
2248       ValueRotInfo &VRI = ValueRotsVec[MaxGroupsIdx];
2249       bool NeedsRotate = false;
2250       if (VRI.RLAmt) {
2251         NeedsRotate = true;
2252       } else if (VRI.Repl32) {
2253         for (auto &BG : BitGroups) {
2254           if (BG.V != VRI.V || BG.RLAmt != VRI.RLAmt ||
2255               BG.Repl32 != VRI.Repl32)
2256             continue;
2257 
2258           // We don't need a rotate if the bit group is confined to the lower
2259           // 32 bits.
2260           if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx < BG.EndIdx)
2261             continue;
2262 
2263           NeedsRotate = true;
2264           break;
2265         }
2266       }
2267 
2268       if (NeedsRotate)
2269         Res = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32,
2270                               VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63,
2271                               InstCnt);
2272       else
2273         Res = VRI.V;
2274 
2275       // Now, remove all groups with this underlying value and rotation factor.
2276       if (Res)
2277         eraseMatchingBitGroups([VRI](const BitGroup &BG) {
2278           return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt &&
2279                  BG.Repl32 == VRI.Repl32;
2280         });
2281     }
2282 
2283     // Because 64-bit rotates are more flexible than inserts, we might have a
2284     // preference regarding which one we do first (to save one instruction).
2285     if (!Res)
2286       for (auto I = BitGroups.begin(), IE = BitGroups.end(); I != IE; ++I) {
2287         if (SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx,
2288                                 false) <
2289             SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx,
2290                                 true)) {
2291           if (I != BitGroups.begin()) {
2292             BitGroup BG = *I;
2293             BitGroups.erase(I);
2294             BitGroups.insert(BitGroups.begin(), BG);
2295           }
2296 
2297           break;
2298         }
2299       }
2300 
2301     // Insert the other groups (one at a time).
2302     for (auto &BG : BitGroups) {
2303       if (!Res)
2304         Res = SelectRotMask64(BG.V, dl, BG.RLAmt, BG.Repl32, BG.StartIdx,
2305                               BG.EndIdx, InstCnt);
2306       else
2307         Res = SelectRotMaskIns64(Res, BG.V, dl, BG.RLAmt, BG.Repl32,
2308                                  BG.StartIdx, BG.EndIdx, InstCnt);
2309     }
2310 
2311     if (LateMask) {
2312       uint64_t Mask = getZerosMask();
2313 
2314       // We can use the 32-bit andi/andis technique if the mask does not
2315       // require any higher-order bits. This can save an instruction compared
2316       // to always using the general 64-bit technique.
2317       bool Use32BitInsts = isUInt<32>(Mask);
2318       // Compute the masks for andi/andis that would be necessary.
2319       unsigned ANDIMask = (Mask & UINT16_MAX),
2320                ANDISMask = (Mask >> 16) & UINT16_MAX;
2321 
2322       if (Use32BitInsts) {
2323         assert((ANDIMask != 0 || ANDISMask != 0) &&
2324                "No set bits in mask when using 32-bit ands for 64-bit value");
2325 
2326         if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) +
2327                                  (unsigned) (ANDISMask != 0) +
2328                                  (unsigned) (ANDIMask != 0 && ANDISMask != 0);
2329 
2330         SDValue ANDIVal, ANDISVal;
2331         if (ANDIMask != 0)
2332           ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo8, dl, MVT::i64,
2333                               ExtendToInt64(Res, dl), getI32Imm(ANDIMask, dl)), 0);
2334         if (ANDISMask != 0)
2335           ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo8, dl, MVT::i64,
2336                                ExtendToInt64(Res, dl), getI32Imm(ANDISMask, dl)), 0);
2337 
2338         if (!ANDIVal)
2339           Res = ANDISVal;
2340         else if (!ANDISVal)
2341           Res = ANDIVal;
2342         else
2343           Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
2344                           ExtendToInt64(ANDIVal, dl), ANDISVal), 0);
2345       } else {
2346         if (InstCnt) *InstCnt += selectI64ImmInstrCount(Mask) + /* and */ 1;
2347 
2348         SDValue MaskVal = SDValue(selectI64Imm(CurDAG, dl, Mask), 0);
2349         Res =
2350           SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64,
2351                                          ExtendToInt64(Res, dl), MaskVal), 0);
2352       }
2353     }
2354 
2355     return Res.getNode();
2356   }
2357 
2358   SDNode *Select(SDNode *N, bool LateMask, unsigned *InstCnt = nullptr) {
2359     // Fill in BitGroups.
2360     collectBitGroups(LateMask);
2361     if (BitGroups.empty())
2362       return nullptr;
2363 
2364     // For 64-bit values, figure out when we can use 32-bit instructions.
2365     if (Bits.size() == 64)
2366       assignRepl32BitGroups();
2367 
2368     // Fill in ValueRotsVec.
2369     collectValueRotInfo();
2370 
2371     if (Bits.size() == 32) {
2372       return Select32(N, LateMask, InstCnt);
2373     } else {
2374       assert(Bits.size() == 64 && "Not 64 bits here?");
2375       return Select64(N, LateMask, InstCnt);
2376     }
2377 
2378     return nullptr;
2379   }
2380 
2381   void eraseMatchingBitGroups(function_ref<bool(const BitGroup &)> F) {
2382     BitGroups.erase(remove_if(BitGroups, F), BitGroups.end());
2383   }
2384 
2385   SmallVector<ValueBit, 64> Bits;
2386 
2387   bool NeedMask;
2388   SmallVector<unsigned, 64> RLAmt;
2389 
2390   SmallVector<BitGroup, 16> BitGroups;
2391 
2392   DenseMap<std::pair<SDValue, unsigned>, ValueRotInfo> ValueRots;
2393   SmallVector<ValueRotInfo, 16> ValueRotsVec;
2394 
2395   SelectionDAG *CurDAG;
2396 
2397 public:
2398   BitPermutationSelector(SelectionDAG *DAG)
2399     : CurDAG(DAG) {}
2400 
2401   // Here we try to match complex bit permutations into a set of
2402   // rotate-and-shift/shift/and/or instructions, using a set of heuristics
2403   // known to produce optimal code for common cases (like i32 byte swapping).
2404   SDNode *Select(SDNode *N) {
2405     Memoizer.clear();
2406     auto Result =
2407         getValueBits(SDValue(N, 0), N->getValueType(0).getSizeInBits());
2408     if (!Result.first)
2409       return nullptr;
2410     Bits = std::move(*Result.second);
2411 
2412     LLVM_DEBUG(dbgs() << "Considering bit-permutation-based instruction"
2413                          " selection for:    ");
2414     LLVM_DEBUG(N->dump(CurDAG));
2415 
2416     // Fill it RLAmt and set NeedMask.
2417     computeRotationAmounts();
2418 
2419     if (!NeedMask)
2420       return Select(N, false);
2421 
2422     // We currently have two techniques for handling results with zeros: early
2423     // masking (the default) and late masking. Late masking is sometimes more
2424     // efficient, but because the structure of the bit groups is different, it
2425     // is hard to tell without generating both and comparing the results. With
2426     // late masking, we ignore zeros in the resulting value when inserting each
2427     // set of bit groups, and then mask in the zeros at the end. With early
2428     // masking, we only insert the non-zero parts of the result at every step.
2429 
2430     unsigned InstCnt = 0, InstCntLateMask = 0;
2431     LLVM_DEBUG(dbgs() << "\tEarly masking:\n");
2432     SDNode *RN = Select(N, false, &InstCnt);
2433     LLVM_DEBUG(dbgs() << "\t\tisel would use " << InstCnt << " instructions\n");
2434 
2435     LLVM_DEBUG(dbgs() << "\tLate masking:\n");
2436     SDNode *RNLM = Select(N, true, &InstCntLateMask);
2437     LLVM_DEBUG(dbgs() << "\t\tisel would use " << InstCntLateMask
2438                       << " instructions\n");
2439 
2440     if (InstCnt <= InstCntLateMask) {
2441       LLVM_DEBUG(dbgs() << "\tUsing early-masking for isel\n");
2442       return RN;
2443     }
2444 
2445     LLVM_DEBUG(dbgs() << "\tUsing late-masking for isel\n");
2446     return RNLM;
2447   }
2448 };
2449 
2450 class IntegerCompareEliminator {
2451   SelectionDAG *CurDAG;
2452   PPCDAGToDAGISel *S;
2453   // Conversion type for interpreting results of a 32-bit instruction as
2454   // a 64-bit value or vice versa.
2455   enum ExtOrTruncConversion { Ext, Trunc };
2456 
2457   // Modifiers to guide how an ISD::SETCC node's result is to be computed
2458   // in a GPR.
2459   // ZExtOrig - use the original condition code, zero-extend value
2460   // ZExtInvert - invert the condition code, zero-extend value
2461   // SExtOrig - use the original condition code, sign-extend value
2462   // SExtInvert - invert the condition code, sign-extend value
2463   enum SetccInGPROpts { ZExtOrig, ZExtInvert, SExtOrig, SExtInvert };
2464 
2465   // Comparisons against zero to emit GPR code sequences for. Each of these
2466   // sequences may need to be emitted for two or more equivalent patterns.
2467   // For example (a >= 0) == (a > -1). The direction of the comparison (</>)
2468   // matters as well as the extension type: sext (-1/0), zext (1/0).
2469   // GEZExt - (zext (LHS >= 0))
2470   // GESExt - (sext (LHS >= 0))
2471   // LEZExt - (zext (LHS <= 0))
2472   // LESExt - (sext (LHS <= 0))
2473   enum ZeroCompare { GEZExt, GESExt, LEZExt, LESExt };
2474 
2475   SDNode *tryEXTEND(SDNode *N);
2476   SDNode *tryLogicOpOfCompares(SDNode *N);
2477   SDValue computeLogicOpInGPR(SDValue LogicOp);
2478   SDValue signExtendInputIfNeeded(SDValue Input);
2479   SDValue zeroExtendInputIfNeeded(SDValue Input);
2480   SDValue addExtOrTrunc(SDValue NatWidthRes, ExtOrTruncConversion Conv);
2481   SDValue getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl,
2482                                         ZeroCompare CmpTy);
2483   SDValue get32BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2484                               int64_t RHSValue, SDLoc dl);
2485  SDValue get32BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2486                               int64_t RHSValue, SDLoc dl);
2487   SDValue get64BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2488                               int64_t RHSValue, SDLoc dl);
2489   SDValue get64BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2490                               int64_t RHSValue, SDLoc dl);
2491   SDValue getSETCCInGPR(SDValue Compare, SetccInGPROpts ConvOpts);
2492 
2493 public:
2494   IntegerCompareEliminator(SelectionDAG *DAG,
2495                            PPCDAGToDAGISel *Sel) : CurDAG(DAG), S(Sel) {
2496     assert(CurDAG->getTargetLoweringInfo()
2497            .getPointerTy(CurDAG->getDataLayout()).getSizeInBits() == 64 &&
2498            "Only expecting to use this on 64 bit targets.");
2499   }
2500   SDNode *Select(SDNode *N) {
2501     if (CmpInGPR == ICGPR_None)
2502       return nullptr;
2503     switch (N->getOpcode()) {
2504     default: break;
2505     case ISD::ZERO_EXTEND:
2506       if (CmpInGPR == ICGPR_Sext || CmpInGPR == ICGPR_SextI32 ||
2507           CmpInGPR == ICGPR_SextI64)
2508         return nullptr;
2509       LLVM_FALLTHROUGH;
2510     case ISD::SIGN_EXTEND:
2511       if (CmpInGPR == ICGPR_Zext || CmpInGPR == ICGPR_ZextI32 ||
2512           CmpInGPR == ICGPR_ZextI64)
2513         return nullptr;
2514       return tryEXTEND(N);
2515     case ISD::AND:
2516     case ISD::OR:
2517     case ISD::XOR:
2518       return tryLogicOpOfCompares(N);
2519     }
2520     return nullptr;
2521   }
2522 };
2523 
2524 static bool isLogicOp(unsigned Opc) {
2525   return Opc == ISD::AND || Opc == ISD::OR || Opc == ISD::XOR;
2526 }
2527 // The obvious case for wanting to keep the value in a GPR. Namely, the
2528 // result of the comparison is actually needed in a GPR.
2529 SDNode *IntegerCompareEliminator::tryEXTEND(SDNode *N) {
2530   assert((N->getOpcode() == ISD::ZERO_EXTEND ||
2531           N->getOpcode() == ISD::SIGN_EXTEND) &&
2532          "Expecting a zero/sign extend node!");
2533   SDValue WideRes;
2534   // If we are zero-extending the result of a logical operation on i1
2535   // values, we can keep the values in GPRs.
2536   if (isLogicOp(N->getOperand(0).getOpcode()) &&
2537       N->getOperand(0).getValueType() == MVT::i1 &&
2538       N->getOpcode() == ISD::ZERO_EXTEND)
2539     WideRes = computeLogicOpInGPR(N->getOperand(0));
2540   else if (N->getOperand(0).getOpcode() != ISD::SETCC)
2541     return nullptr;
2542   else
2543     WideRes =
2544       getSETCCInGPR(N->getOperand(0),
2545                     N->getOpcode() == ISD::SIGN_EXTEND ?
2546                     SetccInGPROpts::SExtOrig : SetccInGPROpts::ZExtOrig);
2547 
2548   if (!WideRes)
2549     return nullptr;
2550 
2551   SDLoc dl(N);
2552   bool Input32Bit = WideRes.getValueType() == MVT::i32;
2553   bool Output32Bit = N->getValueType(0) == MVT::i32;
2554 
2555   NumSextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 1 : 0;
2556   NumZextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 0 : 1;
2557 
2558   SDValue ConvOp = WideRes;
2559   if (Input32Bit != Output32Bit)
2560     ConvOp = addExtOrTrunc(WideRes, Input32Bit ? ExtOrTruncConversion::Ext :
2561                            ExtOrTruncConversion::Trunc);
2562   return ConvOp.getNode();
2563 }
2564 
2565 // Attempt to perform logical operations on the results of comparisons while
2566 // keeping the values in GPRs. Without doing so, these would end up being
2567 // lowered to CR-logical operations which suffer from significant latency and
2568 // low ILP.
2569 SDNode *IntegerCompareEliminator::tryLogicOpOfCompares(SDNode *N) {
2570   if (N->getValueType(0) != MVT::i1)
2571     return nullptr;
2572   assert(isLogicOp(N->getOpcode()) &&
2573          "Expected a logic operation on setcc results.");
2574   SDValue LoweredLogical = computeLogicOpInGPR(SDValue(N, 0));
2575   if (!LoweredLogical)
2576     return nullptr;
2577 
2578   SDLoc dl(N);
2579   bool IsBitwiseNegate = LoweredLogical.getMachineOpcode() == PPC::XORI8;
2580   unsigned SubRegToExtract = IsBitwiseNegate ? PPC::sub_eq : PPC::sub_gt;
2581   SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32);
2582   SDValue LHS = LoweredLogical.getOperand(0);
2583   SDValue RHS = LoweredLogical.getOperand(1);
2584   SDValue WideOp;
2585   SDValue OpToConvToRecForm;
2586 
2587   // Look through any 32-bit to 64-bit implicit extend nodes to find the
2588   // opcode that is input to the XORI.
2589   if (IsBitwiseNegate &&
2590       LoweredLogical.getOperand(0).getMachineOpcode() == PPC::INSERT_SUBREG)
2591     OpToConvToRecForm = LoweredLogical.getOperand(0).getOperand(1);
2592   else if (IsBitwiseNegate)
2593     // If the input to the XORI isn't an extension, that's what we're after.
2594     OpToConvToRecForm = LoweredLogical.getOperand(0);
2595   else
2596     // If this is not an XORI, it is a reg-reg logical op and we can convert
2597     // it to record-form.
2598     OpToConvToRecForm = LoweredLogical;
2599 
2600   // Get the record-form version of the node we're looking to use to get the
2601   // CR result from.
2602   uint16_t NonRecOpc = OpToConvToRecForm.getMachineOpcode();
2603   int NewOpc = PPCInstrInfo::getRecordFormOpcode(NonRecOpc);
2604 
2605   // Convert the right node to record-form. This is either the logical we're
2606   // looking at or it is the input node to the negation (if we're looking at
2607   // a bitwise negation).
2608   if (NewOpc != -1 && IsBitwiseNegate) {
2609     // The input to the XORI has a record-form. Use it.
2610     assert(LoweredLogical.getConstantOperandVal(1) == 1 &&
2611            "Expected a PPC::XORI8 only for bitwise negation.");
2612     // Emit the record-form instruction.
2613     std::vector<SDValue> Ops;
2614     for (int i = 0, e = OpToConvToRecForm.getNumOperands(); i < e; i++)
2615       Ops.push_back(OpToConvToRecForm.getOperand(i));
2616 
2617     WideOp =
2618       SDValue(CurDAG->getMachineNode(NewOpc, dl,
2619                                      OpToConvToRecForm.getValueType(),
2620                                      MVT::Glue, Ops), 0);
2621   } else {
2622     assert((NewOpc != -1 || !IsBitwiseNegate) &&
2623            "No record form available for AND8/OR8/XOR8?");
2624     WideOp =
2625       SDValue(CurDAG->getMachineNode(NewOpc == -1 ? PPC::ANDIo8 : NewOpc, dl,
2626                                      MVT::i64, MVT::Glue, LHS, RHS), 0);
2627   }
2628 
2629   // Select this node to a single bit from CR0 set by the record-form node
2630   // just created. For bitwise negation, use the EQ bit which is the equivalent
2631   // of negating the result (i.e. it is a bit set when the result of the
2632   // operation is zero).
2633   SDValue SRIdxVal =
2634     CurDAG->getTargetConstant(SubRegToExtract, dl, MVT::i32);
2635   SDValue CRBit =
2636     SDValue(CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl,
2637                                    MVT::i1, CR0Reg, SRIdxVal,
2638                                    WideOp.getValue(1)), 0);
2639   return CRBit.getNode();
2640 }
2641 
2642 // Lower a logical operation on i1 values into a GPR sequence if possible.
2643 // The result can be kept in a GPR if requested.
2644 // Three types of inputs can be handled:
2645 // - SETCC
2646 // - TRUNCATE
2647 // - Logical operation (AND/OR/XOR)
2648 // There is also a special case that is handled (namely a complement operation
2649 // achieved with xor %a, -1).
2650 SDValue IntegerCompareEliminator::computeLogicOpInGPR(SDValue LogicOp) {
2651   assert(isLogicOp(LogicOp.getOpcode()) &&
2652         "Can only handle logic operations here.");
2653   assert(LogicOp.getValueType() == MVT::i1 &&
2654          "Can only handle logic operations on i1 values here.");
2655   SDLoc dl(LogicOp);
2656   SDValue LHS, RHS;
2657 
2658  // Special case: xor %a, -1
2659   bool IsBitwiseNegation = isBitwiseNot(LogicOp);
2660 
2661   // Produces a GPR sequence for each operand of the binary logic operation.
2662   // For SETCC, it produces the respective comparison, for TRUNCATE it truncates
2663   // the value in a GPR and for logic operations, it will recursively produce
2664   // a GPR sequence for the operation.
2665  auto getLogicOperand = [&] (SDValue Operand) -> SDValue {
2666     unsigned OperandOpcode = Operand.getOpcode();
2667     if (OperandOpcode == ISD::SETCC)
2668       return getSETCCInGPR(Operand, SetccInGPROpts::ZExtOrig);
2669     else if (OperandOpcode == ISD::TRUNCATE) {
2670       SDValue InputOp = Operand.getOperand(0);
2671      EVT InVT = InputOp.getValueType();
2672       return SDValue(CurDAG->getMachineNode(InVT == MVT::i32 ? PPC::RLDICL_32 :
2673                                             PPC::RLDICL, dl, InVT, InputOp,
2674                                             S->getI64Imm(0, dl),
2675                                             S->getI64Imm(63, dl)), 0);
2676     } else if (isLogicOp(OperandOpcode))
2677       return computeLogicOpInGPR(Operand);
2678     return SDValue();
2679   };
2680   LHS = getLogicOperand(LogicOp.getOperand(0));
2681   RHS = getLogicOperand(LogicOp.getOperand(1));
2682 
2683   // If a GPR sequence can't be produced for the LHS we can't proceed.
2684   // Not producing a GPR sequence for the RHS is only a problem if this isn't
2685   // a bitwise negation operation.
2686   if (!LHS || (!RHS && !IsBitwiseNegation))
2687     return SDValue();
2688 
2689   NumLogicOpsOnComparison++;
2690 
2691   // We will use the inputs as 64-bit values.
2692   if (LHS.getValueType() == MVT::i32)
2693     LHS = addExtOrTrunc(LHS, ExtOrTruncConversion::Ext);
2694   if (!IsBitwiseNegation && RHS.getValueType() == MVT::i32)
2695     RHS = addExtOrTrunc(RHS, ExtOrTruncConversion::Ext);
2696 
2697   unsigned NewOpc;
2698   switch (LogicOp.getOpcode()) {
2699   default: llvm_unreachable("Unknown logic operation.");
2700   case ISD::AND: NewOpc = PPC::AND8; break;
2701   case ISD::OR:  NewOpc = PPC::OR8;  break;
2702   case ISD::XOR: NewOpc = PPC::XOR8; break;
2703   }
2704 
2705   if (IsBitwiseNegation) {
2706     RHS = S->getI64Imm(1, dl);
2707     NewOpc = PPC::XORI8;
2708   }
2709 
2710   return SDValue(CurDAG->getMachineNode(NewOpc, dl, MVT::i64, LHS, RHS), 0);
2711 
2712 }
2713 
2714 /// If the value isn't guaranteed to be sign-extended to 64-bits, extend it.
2715 /// Otherwise just reinterpret it as a 64-bit value.
2716 /// Useful when emitting comparison code for 32-bit values without using
2717 /// the compare instruction (which only considers the lower 32-bits).
2718 SDValue IntegerCompareEliminator::signExtendInputIfNeeded(SDValue Input) {
2719   assert(Input.getValueType() == MVT::i32 &&
2720          "Can only sign-extend 32-bit values here.");
2721   unsigned Opc = Input.getOpcode();
2722 
2723   // The value was sign extended and then truncated to 32-bits. No need to
2724   // sign extend it again.
2725   if (Opc == ISD::TRUNCATE &&
2726       (Input.getOperand(0).getOpcode() == ISD::AssertSext ||
2727        Input.getOperand(0).getOpcode() == ISD::SIGN_EXTEND))
2728     return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
2729 
2730   LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input);
2731   // The input is a sign-extending load. All ppc sign-extending loads
2732   // sign-extend to the full 64-bits.
2733   if (InputLoad && InputLoad->getExtensionType() == ISD::SEXTLOAD)
2734     return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
2735 
2736   ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input);
2737   // We don't sign-extend constants.
2738   if (InputConst)
2739     return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
2740 
2741   SDLoc dl(Input);
2742   SignExtensionsAdded++;
2743   return SDValue(CurDAG->getMachineNode(PPC::EXTSW_32_64, dl,
2744                                         MVT::i64, Input), 0);
2745 }
2746 
2747 /// If the value isn't guaranteed to be zero-extended to 64-bits, extend it.
2748 /// Otherwise just reinterpret it as a 64-bit value.
2749 /// Useful when emitting comparison code for 32-bit values without using
2750 /// the compare instruction (which only considers the lower 32-bits).
2751 SDValue IntegerCompareEliminator::zeroExtendInputIfNeeded(SDValue Input) {
2752   assert(Input.getValueType() == MVT::i32 &&
2753          "Can only zero-extend 32-bit values here.");
2754   unsigned Opc = Input.getOpcode();
2755 
2756   // The only condition under which we can omit the actual extend instruction:
2757   // - The value is a positive constant
2758   // - The value comes from a load that isn't a sign-extending load
2759   // An ISD::TRUNCATE needs to be zero-extended unless it is fed by a zext.
2760   bool IsTruncateOfZExt = Opc == ISD::TRUNCATE &&
2761     (Input.getOperand(0).getOpcode() == ISD::AssertZext ||
2762      Input.getOperand(0).getOpcode() == ISD::ZERO_EXTEND);
2763   if (IsTruncateOfZExt)
2764     return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
2765 
2766   ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input);
2767   if (InputConst && InputConst->getSExtValue() >= 0)
2768     return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
2769 
2770   LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input);
2771   // The input is a load that doesn't sign-extend (it will be zero-extended).
2772   if (InputLoad && InputLoad->getExtensionType() != ISD::SEXTLOAD)
2773     return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
2774 
2775   // None of the above, need to zero-extend.
2776   SDLoc dl(Input);
2777   ZeroExtensionsAdded++;
2778   return SDValue(CurDAG->getMachineNode(PPC::RLDICL_32_64, dl, MVT::i64, Input,
2779                                         S->getI64Imm(0, dl),
2780                                         S->getI64Imm(32, dl)), 0);
2781 }
2782 
2783 // Handle a 32-bit value in a 64-bit register and vice-versa. These are of
2784 // course not actual zero/sign extensions that will generate machine code,
2785 // they're just a way to reinterpret a 32 bit value in a register as a
2786 // 64 bit value and vice-versa.
2787 SDValue IntegerCompareEliminator::addExtOrTrunc(SDValue NatWidthRes,
2788                                                 ExtOrTruncConversion Conv) {
2789   SDLoc dl(NatWidthRes);
2790 
2791   // For reinterpreting 32-bit values as 64 bit values, we generate
2792   // INSERT_SUBREG IMPLICIT_DEF:i64, <input>, TargetConstant:i32<1>
2793   if (Conv == ExtOrTruncConversion::Ext) {
2794     SDValue ImDef(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, MVT::i64), 0);
2795     SDValue SubRegIdx =
2796       CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
2797     return SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, MVT::i64,
2798                                           ImDef, NatWidthRes, SubRegIdx), 0);
2799   }
2800 
2801   assert(Conv == ExtOrTruncConversion::Trunc &&
2802          "Unknown convertion between 32 and 64 bit values.");
2803   // For reinterpreting 64-bit values as 32-bit values, we just need to
2804   // EXTRACT_SUBREG (i.e. extract the low word).
2805   SDValue SubRegIdx =
2806     CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
2807   return SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl, MVT::i32,
2808                                         NatWidthRes, SubRegIdx), 0);
2809 }
2810 
2811 // Produce a GPR sequence for compound comparisons (<=, >=) against zero.
2812 // Handle both zero-extensions and sign-extensions.
2813 SDValue
2814 IntegerCompareEliminator::getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl,
2815                                                          ZeroCompare CmpTy) {
2816   EVT InVT = LHS.getValueType();
2817   bool Is32Bit = InVT == MVT::i32;
2818   SDValue ToExtend;
2819 
2820   // Produce the value that needs to be either zero or sign extended.
2821   switch (CmpTy) {
2822   case ZeroCompare::GEZExt:
2823   case ZeroCompare::GESExt:
2824     ToExtend = SDValue(CurDAG->getMachineNode(Is32Bit ? PPC::NOR : PPC::NOR8,
2825                                               dl, InVT, LHS, LHS), 0);
2826     break;
2827   case ZeroCompare::LEZExt:
2828   case ZeroCompare::LESExt: {
2829     if (Is32Bit) {
2830       // Upper 32 bits cannot be undefined for this sequence.
2831       LHS = signExtendInputIfNeeded(LHS);
2832       SDValue Neg =
2833         SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);
2834       ToExtend =
2835         SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
2836                                        Neg, S->getI64Imm(1, dl),
2837                                        S->getI64Imm(63, dl)), 0);
2838     } else {
2839       SDValue Addi =
2840         SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,
2841                                        S->getI64Imm(~0ULL, dl)), 0);
2842       ToExtend = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
2843                                                 Addi, LHS), 0);
2844     }
2845     break;
2846   }
2847   }
2848 
2849   // For 64-bit sequences, the extensions are the same for the GE/LE cases.
2850   if (!Is32Bit &&
2851       (CmpTy == ZeroCompare::GEZExt || CmpTy == ZeroCompare::LEZExt))
2852     return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
2853                                           ToExtend, S->getI64Imm(1, dl),
2854                                           S->getI64Imm(63, dl)), 0);
2855   if (!Is32Bit &&
2856       (CmpTy == ZeroCompare::GESExt || CmpTy == ZeroCompare::LESExt))
2857     return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, ToExtend,
2858                                           S->getI64Imm(63, dl)), 0);
2859 
2860   assert(Is32Bit && "Should have handled the 32-bit sequences above.");
2861   // For 32-bit sequences, the extensions differ between GE/LE cases.
2862   switch (CmpTy) {
2863   case ZeroCompare::GEZExt: {
2864     SDValue ShiftOps[] = { ToExtend, S->getI32Imm(1, dl), S->getI32Imm(31, dl),
2865                            S->getI32Imm(31, dl) };
2866     return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
2867                                           ShiftOps), 0);
2868   }
2869   case ZeroCompare::GESExt:
2870     return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, ToExtend,
2871                                           S->getI32Imm(31, dl)), 0);
2872   case ZeroCompare::LEZExt:
2873     return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, ToExtend,
2874                                           S->getI32Imm(1, dl)), 0);
2875   case ZeroCompare::LESExt:
2876     return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, ToExtend,
2877                                           S->getI32Imm(-1, dl)), 0);
2878   }
2879 
2880   // The above case covers all the enumerators so it can't have a default clause
2881   // to avoid compiler warnings.
2882   llvm_unreachable("Unknown zero-comparison type.");
2883 }
2884 
2885 /// Produces a zero-extended result of comparing two 32-bit values according to
2886 /// the passed condition code.
2887 SDValue
2888 IntegerCompareEliminator::get32BitZExtCompare(SDValue LHS, SDValue RHS,
2889                                               ISD::CondCode CC,
2890                                               int64_t RHSValue, SDLoc dl) {
2891   if (CmpInGPR == ICGPR_I64 || CmpInGPR == ICGPR_SextI64 ||
2892       CmpInGPR == ICGPR_ZextI64 || CmpInGPR == ICGPR_Sext)
2893     return SDValue();
2894   bool IsRHSZero = RHSValue == 0;
2895   bool IsRHSOne = RHSValue == 1;
2896   bool IsRHSNegOne = RHSValue == -1LL;
2897   switch (CC) {
2898   default: return SDValue();
2899   case ISD::SETEQ: {
2900     // (zext (setcc %a, %b, seteq)) -> (lshr (cntlzw (xor %a, %b)), 5)
2901     // (zext (setcc %a, 0, seteq))  -> (lshr (cntlzw %a), 5)
2902     SDValue Xor = IsRHSZero ? LHS :
2903       SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
2904     SDValue Clz =
2905       SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
2906     SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl),
2907       S->getI32Imm(31, dl) };
2908     return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
2909                                           ShiftOps), 0);
2910   }
2911   case ISD::SETNE: {
2912     // (zext (setcc %a, %b, setne)) -> (xor (lshr (cntlzw (xor %a, %b)), 5), 1)
2913     // (zext (setcc %a, 0, setne))  -> (xor (lshr (cntlzw %a), 5), 1)
2914     SDValue Xor = IsRHSZero ? LHS :
2915       SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
2916     SDValue Clz =
2917       SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
2918     SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl),
2919       S->getI32Imm(31, dl) };
2920     SDValue Shift =
2921       SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0);
2922     return SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift,
2923                                           S->getI32Imm(1, dl)), 0);
2924   }
2925   case ISD::SETGE: {
2926     // (zext (setcc %a, %b, setge)) -> (xor (lshr (sub %a, %b), 63), 1)
2927     // (zext (setcc %a, 0, setge))  -> (lshr (~ %a), 31)
2928     if(IsRHSZero)
2929       return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
2930 
2931     // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a)
2932     // by swapping inputs and falling through.
2933     std::swap(LHS, RHS);
2934     ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
2935     IsRHSZero = RHSConst && RHSConst->isNullValue();
2936     LLVM_FALLTHROUGH;
2937   }
2938   case ISD::SETLE: {
2939     if (CmpInGPR == ICGPR_NonExtIn)
2940       return SDValue();
2941     // (zext (setcc %a, %b, setle)) -> (xor (lshr (sub %b, %a), 63), 1)
2942     // (zext (setcc %a, 0, setle))  -> (xor (lshr (- %a), 63), 1)
2943     if(IsRHSZero) {
2944       if (CmpInGPR == ICGPR_NonExtIn)
2945         return SDValue();
2946       return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
2947     }
2948 
2949     // The upper 32-bits of the register can't be undefined for this sequence.
2950     LHS = signExtendInputIfNeeded(LHS);
2951     RHS = signExtendInputIfNeeded(RHS);
2952     SDValue Sub =
2953       SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);
2954     SDValue Shift =
2955       SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Sub,
2956                                      S->getI64Imm(1, dl), S->getI64Imm(63, dl)),
2957               0);
2958     return
2959       SDValue(CurDAG->getMachineNode(PPC::XORI8, dl,
2960                                      MVT::i64, Shift, S->getI32Imm(1, dl)), 0);
2961   }
2962   case ISD::SETGT: {
2963     // (zext (setcc %a, %b, setgt)) -> (lshr (sub %b, %a), 63)
2964     // (zext (setcc %a, -1, setgt)) -> (lshr (~ %a), 31)
2965     // (zext (setcc %a, 0, setgt))  -> (lshr (- %a), 63)
2966     // Handle SETLT -1 (which is equivalent to SETGE 0).
2967     if (IsRHSNegOne)
2968       return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
2969 
2970     if (IsRHSZero) {
2971       if (CmpInGPR == ICGPR_NonExtIn)
2972         return SDValue();
2973       // The upper 32-bits of the register can't be undefined for this sequence.
2974       LHS = signExtendInputIfNeeded(LHS);
2975       RHS = signExtendInputIfNeeded(RHS);
2976       SDValue Neg =
2977         SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);
2978       return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
2979                      Neg, S->getI32Imm(1, dl), S->getI32Imm(63, dl)), 0);
2980     }
2981     // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as
2982     // (%b < %a) by swapping inputs and falling through.
2983     std::swap(LHS, RHS);
2984     ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
2985     IsRHSZero = RHSConst && RHSConst->isNullValue();
2986     IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
2987     LLVM_FALLTHROUGH;
2988   }
2989   case ISD::SETLT: {
2990     // (zext (setcc %a, %b, setlt)) -> (lshr (sub %a, %b), 63)
2991     // (zext (setcc %a, 1, setlt))  -> (xor (lshr (- %a), 63), 1)
2992     // (zext (setcc %a, 0, setlt))  -> (lshr %a, 31)
2993     // Handle SETLT 1 (which is equivalent to SETLE 0).
2994     if (IsRHSOne) {
2995       if (CmpInGPR == ICGPR_NonExtIn)
2996         return SDValue();
2997       return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
2998     }
2999 
3000     if (IsRHSZero) {
3001       SDValue ShiftOps[] = { LHS, S->getI32Imm(1, dl), S->getI32Imm(31, dl),
3002                              S->getI32Imm(31, dl) };
3003       return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
3004                                             ShiftOps), 0);
3005     }
3006 
3007     if (CmpInGPR == ICGPR_NonExtIn)
3008       return SDValue();
3009     // The upper 32-bits of the register can't be undefined for this sequence.
3010     LHS = signExtendInputIfNeeded(LHS);
3011     RHS = signExtendInputIfNeeded(RHS);
3012     SDValue SUBFNode =
3013       SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
3014     return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3015                                     SUBFNode, S->getI64Imm(1, dl),
3016                                     S->getI64Imm(63, dl)), 0);
3017   }
3018   case ISD::SETUGE:
3019     // (zext (setcc %a, %b, setuge)) -> (xor (lshr (sub %b, %a), 63), 1)
3020     // (zext (setcc %a, %b, setule)) -> (xor (lshr (sub %a, %b), 63), 1)
3021     std::swap(LHS, RHS);
3022     LLVM_FALLTHROUGH;
3023   case ISD::SETULE: {
3024     if (CmpInGPR == ICGPR_NonExtIn)
3025       return SDValue();
3026     // The upper 32-bits of the register can't be undefined for this sequence.
3027     LHS = zeroExtendInputIfNeeded(LHS);
3028     RHS = zeroExtendInputIfNeeded(RHS);
3029     SDValue Subtract =
3030       SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);
3031     SDValue SrdiNode =
3032       SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3033                                           Subtract, S->getI64Imm(1, dl),
3034                                           S->getI64Imm(63, dl)), 0);
3035     return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, SrdiNode,
3036                                             S->getI32Imm(1, dl)), 0);
3037   }
3038   case ISD::SETUGT:
3039     // (zext (setcc %a, %b, setugt)) -> (lshr (sub %b, %a), 63)
3040     // (zext (setcc %a, %b, setult)) -> (lshr (sub %a, %b), 63)
3041     std::swap(LHS, RHS);
3042     LLVM_FALLTHROUGH;
3043   case ISD::SETULT: {
3044     if (CmpInGPR == ICGPR_NonExtIn)
3045       return SDValue();
3046     // The upper 32-bits of the register can't be undefined for this sequence.
3047     LHS = zeroExtendInputIfNeeded(LHS);
3048     RHS = zeroExtendInputIfNeeded(RHS);
3049     SDValue Subtract =
3050       SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
3051     return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3052                                           Subtract, S->getI64Imm(1, dl),
3053                                           S->getI64Imm(63, dl)), 0);
3054   }
3055   }
3056 }
3057 
3058 /// Produces a sign-extended result of comparing two 32-bit values according to
3059 /// the passed condition code.
3060 SDValue
3061 IntegerCompareEliminator::get32BitSExtCompare(SDValue LHS, SDValue RHS,
3062                                               ISD::CondCode CC,
3063                                               int64_t RHSValue, SDLoc dl) {
3064   if (CmpInGPR == ICGPR_I64 || CmpInGPR == ICGPR_SextI64 ||
3065       CmpInGPR == ICGPR_ZextI64 || CmpInGPR == ICGPR_Zext)
3066     return SDValue();
3067   bool IsRHSZero = RHSValue == 0;
3068   bool IsRHSOne = RHSValue == 1;
3069   bool IsRHSNegOne = RHSValue == -1LL;
3070 
3071   switch (CC) {
3072   default: return SDValue();
3073   case ISD::SETEQ: {
3074     // (sext (setcc %a, %b, seteq)) ->
3075     //   (ashr (shl (ctlz (xor %a, %b)), 58), 63)
3076     // (sext (setcc %a, 0, seteq)) ->
3077     //   (ashr (shl (ctlz %a), 58), 63)
3078     SDValue CountInput = IsRHSZero ? LHS :
3079       SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
3080     SDValue Cntlzw =
3081       SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, CountInput), 0);
3082     SDValue SHLOps[] = { Cntlzw, S->getI32Imm(27, dl),
3083                          S->getI32Imm(5, dl), S->getI32Imm(31, dl) };
3084     SDValue Slwi =
3085       SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, SHLOps), 0);
3086     return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Slwi), 0);
3087   }
3088   case ISD::SETNE: {
3089     // Bitwise xor the operands, count leading zeros, shift right by 5 bits and
3090     // flip the bit, finally take 2's complement.
3091     // (sext (setcc %a, %b, setne)) ->
3092     //   (neg (xor (lshr (ctlz (xor %a, %b)), 5), 1))
3093     // Same as above, but the first xor is not needed.
3094     // (sext (setcc %a, 0, setne)) ->
3095     //   (neg (xor (lshr (ctlz %a), 5), 1))
3096     SDValue Xor = IsRHSZero ? LHS :
3097       SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
3098     SDValue Clz =
3099       SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
3100     SDValue ShiftOps[] =
3101       { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl), S->getI32Imm(31, dl) };
3102     SDValue Shift =
3103       SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0);
3104     SDValue Xori =
3105       SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift,
3106                                      S->getI32Imm(1, dl)), 0);
3107     return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Xori), 0);
3108   }
3109   case ISD::SETGE: {
3110     // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %a, %b), 63), -1)
3111     // (sext (setcc %a, 0, setge))  -> (ashr (~ %a), 31)
3112     if (IsRHSZero)
3113       return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3114 
3115     // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a)
3116     // by swapping inputs and falling through.
3117     std::swap(LHS, RHS);
3118     ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3119     IsRHSZero = RHSConst && RHSConst->isNullValue();
3120     LLVM_FALLTHROUGH;
3121   }
3122   case ISD::SETLE: {
3123     if (CmpInGPR == ICGPR_NonExtIn)
3124       return SDValue();
3125     // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %b, %a), 63), -1)
3126     // (sext (setcc %a, 0, setle))  -> (add (lshr (- %a), 63), -1)
3127     if (IsRHSZero)
3128       return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3129 
3130     // The upper 32-bits of the register can't be undefined for this sequence.
3131     LHS = signExtendInputIfNeeded(LHS);
3132     RHS = signExtendInputIfNeeded(RHS);
3133     SDValue SUBFNode =
3134       SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, MVT::Glue,
3135                                      LHS, RHS), 0);
3136     SDValue Srdi =
3137       SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3138                                      SUBFNode, S->getI64Imm(1, dl),
3139                                      S->getI64Imm(63, dl)), 0);
3140     return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Srdi,
3141                                           S->getI32Imm(-1, dl)), 0);
3142   }
3143   case ISD::SETGT: {
3144     // (sext (setcc %a, %b, setgt)) -> (ashr (sub %b, %a), 63)
3145     // (sext (setcc %a, -1, setgt)) -> (ashr (~ %a), 31)
3146     // (sext (setcc %a, 0, setgt))  -> (ashr (- %a), 63)
3147     if (IsRHSNegOne)
3148       return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3149     if (IsRHSZero) {
3150       if (CmpInGPR == ICGPR_NonExtIn)
3151         return SDValue();
3152       // The upper 32-bits of the register can't be undefined for this sequence.
3153       LHS = signExtendInputIfNeeded(LHS);
3154       RHS = signExtendInputIfNeeded(RHS);
3155       SDValue Neg =
3156         SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);
3157         return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Neg,
3158                                               S->getI64Imm(63, dl)), 0);
3159     }
3160     // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as
3161     // (%b < %a) by swapping inputs and falling through.
3162     std::swap(LHS, RHS);
3163     ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3164     IsRHSZero = RHSConst && RHSConst->isNullValue();
3165     IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
3166     LLVM_FALLTHROUGH;
3167   }
3168   case ISD::SETLT: {
3169     // (sext (setcc %a, %b, setgt)) -> (ashr (sub %a, %b), 63)
3170     // (sext (setcc %a, 1, setgt))  -> (add (lshr (- %a), 63), -1)
3171     // (sext (setcc %a, 0, setgt))  -> (ashr %a, 31)
3172     if (IsRHSOne) {
3173       if (CmpInGPR == ICGPR_NonExtIn)
3174         return SDValue();
3175       return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3176     }
3177     if (IsRHSZero)
3178       return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, LHS,
3179                                             S->getI32Imm(31, dl)), 0);
3180 
3181     if (CmpInGPR == ICGPR_NonExtIn)
3182       return SDValue();
3183     // The upper 32-bits of the register can't be undefined for this sequence.
3184     LHS = signExtendInputIfNeeded(LHS);
3185     RHS = signExtendInputIfNeeded(RHS);
3186     SDValue SUBFNode =
3187       SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
3188     return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3189                                           SUBFNode, S->getI64Imm(63, dl)), 0);
3190   }
3191   case ISD::SETUGE:
3192     // (sext (setcc %a, %b, setuge)) -> (add (lshr (sub %a, %b), 63), -1)
3193     // (sext (setcc %a, %b, setule)) -> (add (lshr (sub %b, %a), 63), -1)
3194     std::swap(LHS, RHS);
3195     LLVM_FALLTHROUGH;
3196   case ISD::SETULE: {
3197     if (CmpInGPR == ICGPR_NonExtIn)
3198       return SDValue();
3199     // The upper 32-bits of the register can't be undefined for this sequence.
3200     LHS = zeroExtendInputIfNeeded(LHS);
3201     RHS = zeroExtendInputIfNeeded(RHS);
3202     SDValue Subtract =
3203       SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);
3204     SDValue Shift =
3205       SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Subtract,
3206                                      S->getI32Imm(1, dl), S->getI32Imm(63,dl)),
3207               0);
3208     return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Shift,
3209                                           S->getI32Imm(-1, dl)), 0);
3210   }
3211   case ISD::SETUGT:
3212     // (sext (setcc %a, %b, setugt)) -> (ashr (sub %b, %a), 63)
3213     // (sext (setcc %a, %b, setugt)) -> (ashr (sub %a, %b), 63)
3214     std::swap(LHS, RHS);
3215     LLVM_FALLTHROUGH;
3216   case ISD::SETULT: {
3217     if (CmpInGPR == ICGPR_NonExtIn)
3218       return SDValue();
3219     // The upper 32-bits of the register can't be undefined for this sequence.
3220     LHS = zeroExtendInputIfNeeded(LHS);
3221     RHS = zeroExtendInputIfNeeded(RHS);
3222     SDValue Subtract =
3223       SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
3224     return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3225                                           Subtract, S->getI64Imm(63, dl)), 0);
3226   }
3227   }
3228 }
3229 
3230 /// Produces a zero-extended result of comparing two 64-bit values according to
3231 /// the passed condition code.
3232 SDValue
3233 IntegerCompareEliminator::get64BitZExtCompare(SDValue LHS, SDValue RHS,
3234                                               ISD::CondCode CC,
3235                                               int64_t RHSValue, SDLoc dl) {
3236   if (CmpInGPR == ICGPR_I32 || CmpInGPR == ICGPR_SextI32 ||
3237       CmpInGPR == ICGPR_ZextI32 || CmpInGPR == ICGPR_Sext)
3238     return SDValue();
3239   bool IsRHSZero = RHSValue == 0;
3240   bool IsRHSOne = RHSValue == 1;
3241   bool IsRHSNegOne = RHSValue == -1LL;
3242   switch (CC) {
3243   default: return SDValue();
3244   case ISD::SETEQ: {
3245     // (zext (setcc %a, %b, seteq)) -> (lshr (ctlz (xor %a, %b)), 6)
3246     // (zext (setcc %a, 0, seteq)) ->  (lshr (ctlz %a), 6)
3247     SDValue Xor = IsRHSZero ? LHS :
3248       SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
3249     SDValue Clz =
3250       SDValue(CurDAG->getMachineNode(PPC::CNTLZD, dl, MVT::i64, Xor), 0);
3251     return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Clz,
3252                                           S->getI64Imm(58, dl),
3253                                           S->getI64Imm(63, dl)), 0);
3254   }
3255   case ISD::SETNE: {
3256     // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1)
3257     // (zext (setcc %a, %b, setne)) -> (sube addc.reg, addc.reg, addc.CA)
3258     // {addcz.reg, addcz.CA} = (addcarry %a, -1)
3259     // (zext (setcc %a, 0, setne)) -> (sube addcz.reg, addcz.reg, addcz.CA)
3260     SDValue Xor = IsRHSZero ? LHS :
3261       SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
3262     SDValue AC =
3263       SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue,
3264                                      Xor, S->getI32Imm(~0U, dl)), 0);
3265     return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, AC,
3266                                           Xor, AC.getValue(1)), 0);
3267   }
3268   case ISD::SETGE: {
3269     // {subc.reg, subc.CA} = (subcarry %a, %b)
3270     // (zext (setcc %a, %b, setge)) ->
3271     //   (adde (lshr %b, 63), (ashr %a, 63), subc.CA)
3272     // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 63)
3273     if (IsRHSZero)
3274       return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
3275     std::swap(LHS, RHS);
3276     ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3277     IsRHSZero = RHSConst && RHSConst->isNullValue();
3278     LLVM_FALLTHROUGH;
3279   }
3280   case ISD::SETLE: {
3281     // {subc.reg, subc.CA} = (subcarry %b, %a)
3282     // (zext (setcc %a, %b, setge)) ->
3283     //   (adde (lshr %a, 63), (ashr %b, 63), subc.CA)
3284     // (zext (setcc %a, 0, setge)) -> (lshr (or %a, (add %a, -1)), 63)
3285     if (IsRHSZero)
3286       return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
3287     SDValue ShiftL =
3288       SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,
3289                                      S->getI64Imm(1, dl),
3290                                      S->getI64Imm(63, dl)), 0);
3291     SDValue ShiftR =
3292       SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS,
3293                                      S->getI64Imm(63, dl)), 0);
3294     SDValue SubtractCarry =
3295       SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3296                                      LHS, RHS), 1);
3297     return SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,
3298                                           ShiftR, ShiftL, SubtractCarry), 0);
3299   }
3300   case ISD::SETGT: {
3301     // {subc.reg, subc.CA} = (subcarry %b, %a)
3302     // (zext (setcc %a, %b, setgt)) ->
3303     //   (xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1)
3304     // (zext (setcc %a, 0, setgt)) -> (lshr (nor (add %a, -1), %a), 63)
3305     if (IsRHSNegOne)
3306       return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
3307     if (IsRHSZero) {
3308       SDValue Addi =
3309         SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,
3310                                        S->getI64Imm(~0ULL, dl)), 0);
3311       SDValue Nor =
3312         SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Addi, LHS), 0);
3313       return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Nor,
3314                                             S->getI64Imm(1, dl),
3315                                             S->getI64Imm(63, dl)), 0);
3316     }
3317     std::swap(LHS, RHS);
3318     ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3319     IsRHSZero = RHSConst && RHSConst->isNullValue();
3320     IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
3321     LLVM_FALLTHROUGH;
3322   }
3323   case ISD::SETLT: {
3324     // {subc.reg, subc.CA} = (subcarry %a, %b)
3325     // (zext (setcc %a, %b, setlt)) ->
3326     //   (xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1)
3327     // (zext (setcc %a, 0, setlt)) -> (lshr %a, 63)
3328     if (IsRHSOne)
3329       return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
3330     if (IsRHSZero)
3331       return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,
3332                                             S->getI64Imm(1, dl),
3333                                             S->getI64Imm(63, dl)), 0);
3334     SDValue SRADINode =
3335       SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3336                                      LHS, S->getI64Imm(63, dl)), 0);
3337     SDValue SRDINode =
3338       SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3339                                      RHS, S->getI64Imm(1, dl),
3340                                      S->getI64Imm(63, dl)), 0);
3341     SDValue SUBFC8Carry =
3342       SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3343                                      RHS, LHS), 1);
3344     SDValue ADDE8Node =
3345       SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,
3346                                      SRDINode, SRADINode, SUBFC8Carry), 0);
3347     return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,
3348                                           ADDE8Node, S->getI64Imm(1, dl)), 0);
3349   }
3350   case ISD::SETUGE:
3351     // {subc.reg, subc.CA} = (subcarry %a, %b)
3352     // (zext (setcc %a, %b, setuge)) -> (add (sube %b, %b, subc.CA), 1)
3353     std::swap(LHS, RHS);
3354     LLVM_FALLTHROUGH;
3355   case ISD::SETULE: {
3356     // {subc.reg, subc.CA} = (subcarry %b, %a)
3357     // (zext (setcc %a, %b, setule)) -> (add (sube %a, %a, subc.CA), 1)
3358     SDValue SUBFC8Carry =
3359       SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3360                                      LHS, RHS), 1);
3361     SDValue SUBFE8Node =
3362       SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue,
3363                                      LHS, LHS, SUBFC8Carry), 0);
3364     return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64,
3365                                           SUBFE8Node, S->getI64Imm(1, dl)), 0);
3366   }
3367   case ISD::SETUGT:
3368     // {subc.reg, subc.CA} = (subcarry %b, %a)
3369     // (zext (setcc %a, %b, setugt)) -> -(sube %b, %b, subc.CA)
3370     std::swap(LHS, RHS);
3371     LLVM_FALLTHROUGH;
3372   case ISD::SETULT: {
3373     // {subc.reg, subc.CA} = (subcarry %a, %b)
3374     // (zext (setcc %a, %b, setult)) -> -(sube %a, %a, subc.CA)
3375     SDValue SubtractCarry =
3376       SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3377                                      RHS, LHS), 1);
3378     SDValue ExtSub =
3379       SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64,
3380                                      LHS, LHS, SubtractCarry), 0);
3381     return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64,
3382                                           ExtSub), 0);
3383   }
3384   }
3385 }
3386 
3387 /// Produces a sign-extended result of comparing two 64-bit values according to
3388 /// the passed condition code.
3389 SDValue
3390 IntegerCompareEliminator::get64BitSExtCompare(SDValue LHS, SDValue RHS,
3391                                               ISD::CondCode CC,
3392                                               int64_t RHSValue, SDLoc dl) {
3393   if (CmpInGPR == ICGPR_I32 || CmpInGPR == ICGPR_SextI32 ||
3394       CmpInGPR == ICGPR_ZextI32 || CmpInGPR == ICGPR_Zext)
3395     return SDValue();
3396   bool IsRHSZero = RHSValue == 0;
3397   bool IsRHSOne = RHSValue == 1;
3398   bool IsRHSNegOne = RHSValue == -1LL;
3399   switch (CC) {
3400   default: return SDValue();
3401   case ISD::SETEQ: {
3402     // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1)
3403     // (sext (setcc %a, %b, seteq)) -> (sube addc.reg, addc.reg, addc.CA)
3404     // {addcz.reg, addcz.CA} = (addcarry %a, -1)
3405     // (sext (setcc %a, 0, seteq)) -> (sube addcz.reg, addcz.reg, addcz.CA)
3406     SDValue AddInput = IsRHSZero ? LHS :
3407       SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
3408     SDValue Addic =
3409       SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue,
3410                                      AddInput, S->getI32Imm(~0U, dl)), 0);
3411     return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, Addic,
3412                                           Addic, Addic.getValue(1)), 0);
3413   }
3414   case ISD::SETNE: {
3415     // {subfc.reg, subfc.CA} = (subcarry 0, (xor %a, %b))
3416     // (sext (setcc %a, %b, setne)) -> (sube subfc.reg, subfc.reg, subfc.CA)
3417     // {subfcz.reg, subfcz.CA} = (subcarry 0, %a)
3418     // (sext (setcc %a, 0, setne)) -> (sube subfcz.reg, subfcz.reg, subfcz.CA)
3419     SDValue Xor = IsRHSZero ? LHS :
3420       SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
3421     SDValue SC =
3422       SDValue(CurDAG->getMachineNode(PPC::SUBFIC8, dl, MVT::i64, MVT::Glue,
3423                                      Xor, S->getI32Imm(0, dl)), 0);
3424     return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, SC,
3425                                           SC, SC.getValue(1)), 0);
3426   }
3427   case ISD::SETGE: {
3428     // {subc.reg, subc.CA} = (subcarry %a, %b)
3429     // (zext (setcc %a, %b, setge)) ->
3430     //   (- (adde (lshr %b, 63), (ashr %a, 63), subc.CA))
3431     // (zext (setcc %a, 0, setge)) -> (~ (ashr %a, 63))
3432     if (IsRHSZero)
3433       return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3434     std::swap(LHS, RHS);
3435     ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3436     IsRHSZero = RHSConst && RHSConst->isNullValue();
3437     LLVM_FALLTHROUGH;
3438   }
3439   case ISD::SETLE: {
3440     // {subc.reg, subc.CA} = (subcarry %b, %a)
3441     // (zext (setcc %a, %b, setge)) ->
3442     //   (- (adde (lshr %a, 63), (ashr %b, 63), subc.CA))
3443     // (zext (setcc %a, 0, setge)) -> (ashr (or %a, (add %a, -1)), 63)
3444     if (IsRHSZero)
3445       return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3446     SDValue ShiftR =
3447       SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS,
3448                                      S->getI64Imm(63, dl)), 0);
3449     SDValue ShiftL =
3450       SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,
3451                                      S->getI64Imm(1, dl),
3452                                      S->getI64Imm(63, dl)), 0);
3453     SDValue SubtractCarry =
3454       SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3455                                      LHS, RHS), 1);
3456     SDValue Adde =
3457       SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,
3458                                      ShiftR, ShiftL, SubtractCarry), 0);
3459     return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, Adde), 0);
3460   }
3461   case ISD::SETGT: {
3462     // {subc.reg, subc.CA} = (subcarry %b, %a)
3463     // (zext (setcc %a, %b, setgt)) ->
3464     //   -(xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1)
3465     // (zext (setcc %a, 0, setgt)) -> (ashr (nor (add %a, -1), %a), 63)
3466     if (IsRHSNegOne)
3467       return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3468     if (IsRHSZero) {
3469       SDValue Add =
3470         SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,
3471                                        S->getI64Imm(-1, dl)), 0);
3472       SDValue Nor =
3473         SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Add, LHS), 0);
3474       return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Nor,
3475                                             S->getI64Imm(63, dl)), 0);
3476     }
3477     std::swap(LHS, RHS);
3478     ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3479     IsRHSZero = RHSConst && RHSConst->isNullValue();
3480     IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
3481     LLVM_FALLTHROUGH;
3482   }
3483   case ISD::SETLT: {
3484     // {subc.reg, subc.CA} = (subcarry %a, %b)
3485     // (zext (setcc %a, %b, setlt)) ->
3486     //   -(xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1)
3487     // (zext (setcc %a, 0, setlt)) -> (ashr %a, 63)
3488     if (IsRHSOne)
3489       return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3490     if (IsRHSZero) {
3491       return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, LHS,
3492                                             S->getI64Imm(63, dl)), 0);
3493     }
3494     SDValue SRADINode =
3495       SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3496                                      LHS, S->getI64Imm(63, dl)), 0);
3497     SDValue SRDINode =
3498       SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3499                                      RHS, S->getI64Imm(1, dl),
3500                                      S->getI64Imm(63, dl)), 0);
3501     SDValue SUBFC8Carry =
3502       SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3503                                      RHS, LHS), 1);
3504     SDValue ADDE8Node =
3505       SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64,
3506                                      SRDINode, SRADINode, SUBFC8Carry), 0);
3507     SDValue XORI8Node =
3508       SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,
3509                                      ADDE8Node, S->getI64Imm(1, dl)), 0);
3510     return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64,
3511                                           XORI8Node), 0);
3512   }
3513   case ISD::SETUGE:
3514     // {subc.reg, subc.CA} = (subcarry %a, %b)
3515     // (sext (setcc %a, %b, setuge)) -> ~(sube %b, %b, subc.CA)
3516     std::swap(LHS, RHS);
3517     LLVM_FALLTHROUGH;
3518   case ISD::SETULE: {
3519     // {subc.reg, subc.CA} = (subcarry %b, %a)
3520     // (sext (setcc %a, %b, setule)) -> ~(sube %a, %a, subc.CA)
3521     SDValue SubtractCarry =
3522       SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3523                                      LHS, RHS), 1);
3524     SDValue ExtSub =
3525       SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue, LHS,
3526                                      LHS, SubtractCarry), 0);
3527     return SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64,
3528                                           ExtSub, ExtSub), 0);
3529   }
3530   case ISD::SETUGT:
3531     // {subc.reg, subc.CA} = (subcarry %b, %a)
3532     // (sext (setcc %a, %b, setugt)) -> (sube %b, %b, subc.CA)
3533     std::swap(LHS, RHS);
3534     LLVM_FALLTHROUGH;
3535   case ISD::SETULT: {
3536     // {subc.reg, subc.CA} = (subcarry %a, %b)
3537     // (sext (setcc %a, %b, setult)) -> (sube %a, %a, subc.CA)
3538     SDValue SubCarry =
3539       SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3540                                      RHS, LHS), 1);
3541     return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64,
3542                                      LHS, LHS, SubCarry), 0);
3543   }
3544   }
3545 }
3546 
3547 /// Do all uses of this SDValue need the result in a GPR?
3548 /// This is meant to be used on values that have type i1 since
3549 /// it is somewhat meaningless to ask if values of other types
3550 /// should be kept in GPR's.
3551 static bool allUsesExtend(SDValue Compare, SelectionDAG *CurDAG) {
3552   assert(Compare.getOpcode() == ISD::SETCC &&
3553          "An ISD::SETCC node required here.");
3554 
3555   // For values that have a single use, the caller should obviously already have
3556   // checked if that use is an extending use. We check the other uses here.
3557   if (Compare.hasOneUse())
3558     return true;
3559   // We want the value in a GPR if it is being extended, used for a select, or
3560   // used in logical operations.
3561   for (auto CompareUse : Compare.getNode()->uses())
3562     if (CompareUse->getOpcode() != ISD::SIGN_EXTEND &&
3563         CompareUse->getOpcode() != ISD::ZERO_EXTEND &&
3564         CompareUse->getOpcode() != ISD::SELECT &&
3565         !isLogicOp(CompareUse->getOpcode())) {
3566       OmittedForNonExtendUses++;
3567       return false;
3568     }
3569   return true;
3570 }
3571 
3572 /// Returns an equivalent of a SETCC node but with the result the same width as
3573 /// the inputs. This can also be used for SELECT_CC if either the true or false
3574 /// values is a power of two while the other is zero.
3575 SDValue IntegerCompareEliminator::getSETCCInGPR(SDValue Compare,
3576                                                 SetccInGPROpts ConvOpts) {
3577   assert((Compare.getOpcode() == ISD::SETCC ||
3578           Compare.getOpcode() == ISD::SELECT_CC) &&
3579          "An ISD::SETCC node required here.");
3580 
3581   // Don't convert this comparison to a GPR sequence because there are uses
3582   // of the i1 result (i.e. uses that require the result in the CR).
3583   if ((Compare.getOpcode() == ISD::SETCC) && !allUsesExtend(Compare, CurDAG))
3584     return SDValue();
3585 
3586   SDValue LHS = Compare.getOperand(0);
3587   SDValue RHS = Compare.getOperand(1);
3588 
3589   // The condition code is operand 2 for SETCC and operand 4 for SELECT_CC.
3590   int CCOpNum = Compare.getOpcode() == ISD::SELECT_CC ? 4 : 2;
3591   ISD::CondCode CC =
3592     cast<CondCodeSDNode>(Compare.getOperand(CCOpNum))->get();
3593   EVT InputVT = LHS.getValueType();
3594   if (InputVT != MVT::i32 && InputVT != MVT::i64)
3595     return SDValue();
3596 
3597   if (ConvOpts == SetccInGPROpts::ZExtInvert ||
3598       ConvOpts == SetccInGPROpts::SExtInvert)
3599     CC = ISD::getSetCCInverse(CC, true);
3600 
3601   bool Inputs32Bit = InputVT == MVT::i32;
3602 
3603   SDLoc dl(Compare);
3604   ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3605   int64_t RHSValue = RHSConst ? RHSConst->getSExtValue() : INT64_MAX;
3606   bool IsSext = ConvOpts == SetccInGPROpts::SExtOrig ||
3607     ConvOpts == SetccInGPROpts::SExtInvert;
3608 
3609   if (IsSext && Inputs32Bit)
3610     return get32BitSExtCompare(LHS, RHS, CC, RHSValue, dl);
3611   else if (Inputs32Bit)
3612     return get32BitZExtCompare(LHS, RHS, CC, RHSValue, dl);
3613   else if (IsSext)
3614     return get64BitSExtCompare(LHS, RHS, CC, RHSValue, dl);
3615   return get64BitZExtCompare(LHS, RHS, CC, RHSValue, dl);
3616 }
3617 
3618 } // end anonymous namespace
3619 
3620 bool PPCDAGToDAGISel::tryIntCompareInGPR(SDNode *N) {
3621   if (N->getValueType(0) != MVT::i32 &&
3622       N->getValueType(0) != MVT::i64)
3623     return false;
3624 
3625   // This optimization will emit code that assumes 64-bit registers
3626   // so we don't want to run it in 32-bit mode. Also don't run it
3627   // on functions that are not to be optimized.
3628   if (TM.getOptLevel() == CodeGenOpt::None || !TM.isPPC64())
3629     return false;
3630 
3631   switch (N->getOpcode()) {
3632   default: break;
3633   case ISD::ZERO_EXTEND:
3634   case ISD::SIGN_EXTEND:
3635   case ISD::AND:
3636   case ISD::OR:
3637   case ISD::XOR: {
3638     IntegerCompareEliminator ICmpElim(CurDAG, this);
3639     if (SDNode *New = ICmpElim.Select(N)) {
3640       ReplaceNode(N, New);
3641       return true;
3642     }
3643   }
3644   }
3645   return false;
3646 }
3647 
3648 bool PPCDAGToDAGISel::tryBitPermutation(SDNode *N) {
3649   if (N->getValueType(0) != MVT::i32 &&
3650       N->getValueType(0) != MVT::i64)
3651     return false;
3652 
3653   if (!UseBitPermRewriter)
3654     return false;
3655 
3656   switch (N->getOpcode()) {
3657   default: break;
3658   case ISD::ROTL:
3659   case ISD::SHL:
3660   case ISD::SRL:
3661   case ISD::AND:
3662   case ISD::OR: {
3663     BitPermutationSelector BPS(CurDAG);
3664     if (SDNode *New = BPS.Select(N)) {
3665       ReplaceNode(N, New);
3666       return true;
3667     }
3668     return false;
3669   }
3670   }
3671 
3672   return false;
3673 }
3674 
3675 /// SelectCC - Select a comparison of the specified values with the specified
3676 /// condition code, returning the CR# of the expression.
3677 SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
3678                                   const SDLoc &dl) {
3679   // Always select the LHS.
3680   unsigned Opc;
3681 
3682   if (LHS.getValueType() == MVT::i32) {
3683     unsigned Imm;
3684     if (CC == ISD::SETEQ || CC == ISD::SETNE) {
3685       if (isInt32Immediate(RHS, Imm)) {
3686         // SETEQ/SETNE comparison with 16-bit immediate, fold it.
3687         if (isUInt<16>(Imm))
3688           return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
3689                                                 getI32Imm(Imm & 0xFFFF, dl)),
3690                          0);
3691         // If this is a 16-bit signed immediate, fold it.
3692         if (isInt<16>((int)Imm))
3693           return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
3694                                                 getI32Imm(Imm & 0xFFFF, dl)),
3695                          0);
3696 
3697         // For non-equality comparisons, the default code would materialize the
3698         // constant, then compare against it, like this:
3699         //   lis r2, 4660
3700         //   ori r2, r2, 22136
3701         //   cmpw cr0, r3, r2
3702         // Since we are just comparing for equality, we can emit this instead:
3703         //   xoris r0,r3,0x1234
3704         //   cmplwi cr0,r0,0x5678
3705         //   beq cr0,L6
3706         SDValue Xor(CurDAG->getMachineNode(PPC::XORIS, dl, MVT::i32, LHS,
3707                                            getI32Imm(Imm >> 16, dl)), 0);
3708         return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, Xor,
3709                                               getI32Imm(Imm & 0xFFFF, dl)), 0);
3710       }
3711       Opc = PPC::CMPLW;
3712     } else if (ISD::isUnsignedIntSetCC(CC)) {
3713       if (isInt32Immediate(RHS, Imm) && isUInt<16>(Imm))
3714         return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
3715                                               getI32Imm(Imm & 0xFFFF, dl)), 0);
3716       Opc = PPC::CMPLW;
3717     } else {
3718       int16_t SImm;
3719       if (isIntS16Immediate(RHS, SImm))
3720         return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
3721                                               getI32Imm((int)SImm & 0xFFFF,
3722                                                         dl)),
3723                          0);
3724       Opc = PPC::CMPW;
3725     }
3726   } else if (LHS.getValueType() == MVT::i64) {
3727     uint64_t Imm;
3728     if (CC == ISD::SETEQ || CC == ISD::SETNE) {
3729       if (isInt64Immediate(RHS.getNode(), Imm)) {
3730         // SETEQ/SETNE comparison with 16-bit immediate, fold it.
3731         if (isUInt<16>(Imm))
3732           return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
3733                                                 getI32Imm(Imm & 0xFFFF, dl)),
3734                          0);
3735         // If this is a 16-bit signed immediate, fold it.
3736         if (isInt<16>(Imm))
3737           return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
3738                                                 getI32Imm(Imm & 0xFFFF, dl)),
3739                          0);
3740 
3741         // For non-equality comparisons, the default code would materialize the
3742         // constant, then compare against it, like this:
3743         //   lis r2, 4660
3744         //   ori r2, r2, 22136
3745         //   cmpd cr0, r3, r2
3746         // Since we are just comparing for equality, we can emit this instead:
3747         //   xoris r0,r3,0x1234
3748         //   cmpldi cr0,r0,0x5678
3749         //   beq cr0,L6
3750         if (isUInt<32>(Imm)) {
3751           SDValue Xor(CurDAG->getMachineNode(PPC::XORIS8, dl, MVT::i64, LHS,
3752                                              getI64Imm(Imm >> 16, dl)), 0);
3753           return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, Xor,
3754                                                 getI64Imm(Imm & 0xFFFF, dl)),
3755                          0);
3756         }
3757       }
3758       Opc = PPC::CMPLD;
3759     } else if (ISD::isUnsignedIntSetCC(CC)) {
3760       if (isInt64Immediate(RHS.getNode(), Imm) && isUInt<16>(Imm))
3761         return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
3762                                               getI64Imm(Imm & 0xFFFF, dl)), 0);
3763       Opc = PPC::CMPLD;
3764     } else {
3765       int16_t SImm;
3766       if (isIntS16Immediate(RHS, SImm))
3767         return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
3768                                               getI64Imm(SImm & 0xFFFF, dl)),
3769                          0);
3770       Opc = PPC::CMPD;
3771     }
3772   } else if (LHS.getValueType() == MVT::f32) {
3773     if (PPCSubTarget->hasSPE()) {
3774       switch (CC) {
3775         default:
3776         case ISD::SETEQ:
3777         case ISD::SETNE:
3778           Opc = PPC::EFSCMPEQ;
3779           break;
3780         case ISD::SETLT:
3781         case ISD::SETGE:
3782         case ISD::SETOLT:
3783         case ISD::SETOGE:
3784         case ISD::SETULT:
3785         case ISD::SETUGE:
3786           Opc = PPC::EFSCMPLT;
3787           break;
3788         case ISD::SETGT:
3789         case ISD::SETLE:
3790         case ISD::SETOGT:
3791         case ISD::SETOLE:
3792         case ISD::SETUGT:
3793         case ISD::SETULE:
3794           Opc = PPC::EFSCMPGT;
3795           break;
3796       }
3797     } else
3798       Opc = PPC::FCMPUS;
3799   } else if (LHS.getValueType() == MVT::f64) {
3800     if (PPCSubTarget->hasSPE()) {
3801       switch (CC) {
3802         default:
3803         case ISD::SETEQ:
3804         case ISD::SETNE:
3805           Opc = PPC::EFDCMPEQ;
3806           break;
3807         case ISD::SETLT:
3808         case ISD::SETGE:
3809         case ISD::SETOLT:
3810         case ISD::SETOGE:
3811         case ISD::SETULT:
3812         case ISD::SETUGE:
3813           Opc = PPC::EFDCMPLT;
3814           break;
3815         case ISD::SETGT:
3816         case ISD::SETLE:
3817         case ISD::SETOGT:
3818         case ISD::SETOLE:
3819         case ISD::SETUGT:
3820         case ISD::SETULE:
3821           Opc = PPC::EFDCMPGT;
3822           break;
3823       }
3824     } else
3825       Opc = PPCSubTarget->hasVSX() ? PPC::XSCMPUDP : PPC::FCMPUD;
3826   } else {
3827     assert(LHS.getValueType() == MVT::f128 && "Unknown vt!");
3828     assert(PPCSubTarget->hasVSX() && "__float128 requires VSX");
3829     Opc = PPC::XSCMPUQP;
3830   }
3831   return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0);
3832 }
3833 
3834 static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC) {
3835   switch (CC) {
3836   case ISD::SETUEQ:
3837   case ISD::SETONE:
3838   case ISD::SETOLE:
3839   case ISD::SETOGE:
3840     llvm_unreachable("Should be lowered by legalize!");
3841   default: llvm_unreachable("Unknown condition!");
3842   case ISD::SETOEQ:
3843   case ISD::SETEQ:  return PPC::PRED_EQ;
3844   case ISD::SETUNE:
3845   case ISD::SETNE:  return PPC::PRED_NE;
3846   case ISD::SETOLT:
3847   case ISD::SETLT:  return PPC::PRED_LT;
3848   case ISD::SETULE:
3849   case ISD::SETLE:  return PPC::PRED_LE;
3850   case ISD::SETOGT:
3851   case ISD::SETGT:  return PPC::PRED_GT;
3852   case ISD::SETUGE:
3853   case ISD::SETGE:  return PPC::PRED_GE;
3854   case ISD::SETO:   return PPC::PRED_NU;
3855   case ISD::SETUO:  return PPC::PRED_UN;
3856     // These two are invalid for floating point.  Assume we have int.
3857   case ISD::SETULT: return PPC::PRED_LT;
3858   case ISD::SETUGT: return PPC::PRED_GT;
3859   }
3860 }
3861 
3862 /// getCRIdxForSetCC - Return the index of the condition register field
3863 /// associated with the SetCC condition, and whether or not the field is
3864 /// treated as inverted.  That is, lt = 0; ge = 0 inverted.
3865 static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) {
3866   Invert = false;
3867   switch (CC) {
3868   default: llvm_unreachable("Unknown condition!");
3869   case ISD::SETOLT:
3870   case ISD::SETLT:  return 0;                  // Bit #0 = SETOLT
3871   case ISD::SETOGT:
3872   case ISD::SETGT:  return 1;                  // Bit #1 = SETOGT
3873   case ISD::SETOEQ:
3874   case ISD::SETEQ:  return 2;                  // Bit #2 = SETOEQ
3875   case ISD::SETUO:  return 3;                  // Bit #3 = SETUO
3876   case ISD::SETUGE:
3877   case ISD::SETGE:  Invert = true; return 0;   // !Bit #0 = SETUGE
3878   case ISD::SETULE:
3879   case ISD::SETLE:  Invert = true; return 1;   // !Bit #1 = SETULE
3880   case ISD::SETUNE:
3881   case ISD::SETNE:  Invert = true; return 2;   // !Bit #2 = SETUNE
3882   case ISD::SETO:   Invert = true; return 3;   // !Bit #3 = SETO
3883   case ISD::SETUEQ:
3884   case ISD::SETOGE:
3885   case ISD::SETOLE:
3886   case ISD::SETONE:
3887     llvm_unreachable("Invalid branch code: should be expanded by legalize");
3888   // These are invalid for floating point.  Assume integer.
3889   case ISD::SETULT: return 0;
3890   case ISD::SETUGT: return 1;
3891   }
3892 }
3893 
3894 // getVCmpInst: return the vector compare instruction for the specified
3895 // vector type and condition code. Since this is for altivec specific code,
3896 // only support the altivec types (v16i8, v8i16, v4i32, v2i64, and v4f32).
3897 static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
3898                                 bool HasVSX, bool &Swap, bool &Negate) {
3899   Swap = false;
3900   Negate = false;
3901 
3902   if (VecVT.isFloatingPoint()) {
3903     /* Handle some cases by swapping input operands.  */
3904     switch (CC) {
3905       case ISD::SETLE: CC = ISD::SETGE; Swap = true; break;
3906       case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
3907       case ISD::SETOLE: CC = ISD::SETOGE; Swap = true; break;
3908       case ISD::SETOLT: CC = ISD::SETOGT; Swap = true; break;
3909       case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
3910       case ISD::SETUGT: CC = ISD::SETULT; Swap = true; break;
3911       default: break;
3912     }
3913     /* Handle some cases by negating the result.  */
3914     switch (CC) {
3915       case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
3916       case ISD::SETUNE: CC = ISD::SETOEQ; Negate = true; break;
3917       case ISD::SETULE: CC = ISD::SETOGT; Negate = true; break;
3918       case ISD::SETULT: CC = ISD::SETOGE; Negate = true; break;
3919       default: break;
3920     }
3921     /* We have instructions implementing the remaining cases.  */
3922     switch (CC) {
3923       case ISD::SETEQ:
3924       case ISD::SETOEQ:
3925         if (VecVT == MVT::v4f32)
3926           return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP;
3927         else if (VecVT == MVT::v2f64)
3928           return PPC::XVCMPEQDP;
3929         break;
3930       case ISD::SETGT:
3931       case ISD::SETOGT:
3932         if (VecVT == MVT::v4f32)
3933           return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP;
3934         else if (VecVT == MVT::v2f64)
3935           return PPC::XVCMPGTDP;
3936         break;
3937       case ISD::SETGE:
3938       case ISD::SETOGE:
3939         if (VecVT == MVT::v4f32)
3940           return HasVSX ? PPC::XVCMPGESP : PPC::VCMPGEFP;
3941         else if (VecVT == MVT::v2f64)
3942           return PPC::XVCMPGEDP;
3943         break;
3944       default:
3945         break;
3946     }
3947     llvm_unreachable("Invalid floating-point vector compare condition");
3948   } else {
3949     /* Handle some cases by swapping input operands.  */
3950     switch (CC) {
3951       case ISD::SETGE: CC = ISD::SETLE; Swap = true; break;
3952       case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
3953       case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
3954       case ISD::SETULT: CC = ISD::SETUGT; Swap = true; break;
3955       default: break;
3956     }
3957     /* Handle some cases by negating the result.  */
3958     switch (CC) {
3959       case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
3960       case ISD::SETUNE: CC = ISD::SETUEQ; Negate = true; break;
3961       case ISD::SETLE: CC = ISD::SETGT; Negate = true; break;
3962       case ISD::SETULE: CC = ISD::SETUGT; Negate = true; break;
3963       default: break;
3964     }
3965     /* We have instructions implementing the remaining cases.  */
3966     switch (CC) {
3967       case ISD::SETEQ:
3968       case ISD::SETUEQ:
3969         if (VecVT == MVT::v16i8)
3970           return PPC::VCMPEQUB;
3971         else if (VecVT == MVT::v8i16)
3972           return PPC::VCMPEQUH;
3973         else if (VecVT == MVT::v4i32)
3974           return PPC::VCMPEQUW;
3975         else if (VecVT == MVT::v2i64)
3976           return PPC::VCMPEQUD;
3977         break;
3978       case ISD::SETGT:
3979         if (VecVT == MVT::v16i8)
3980           return PPC::VCMPGTSB;
3981         else if (VecVT == MVT::v8i16)
3982           return PPC::VCMPGTSH;
3983         else if (VecVT == MVT::v4i32)
3984           return PPC::VCMPGTSW;
3985         else if (VecVT == MVT::v2i64)
3986           return PPC::VCMPGTSD;
3987         break;
3988       case ISD::SETUGT:
3989         if (VecVT == MVT::v16i8)
3990           return PPC::VCMPGTUB;
3991         else if (VecVT == MVT::v8i16)
3992           return PPC::VCMPGTUH;
3993         else if (VecVT == MVT::v4i32)
3994           return PPC::VCMPGTUW;
3995         else if (VecVT == MVT::v2i64)
3996           return PPC::VCMPGTUD;
3997         break;
3998       default:
3999         break;
4000     }
4001     llvm_unreachable("Invalid integer vector compare condition");
4002   }
4003 }
4004 
4005 bool PPCDAGToDAGISel::trySETCC(SDNode *N) {
4006   SDLoc dl(N);
4007   unsigned Imm;
4008   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
4009   EVT PtrVT =
4010       CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout());
4011   bool isPPC64 = (PtrVT == MVT::i64);
4012 
4013   if (!PPCSubTarget->useCRBits() &&
4014       isInt32Immediate(N->getOperand(1), Imm)) {
4015     // We can codegen setcc op, imm very efficiently compared to a brcond.
4016     // Check for those cases here.
4017     // setcc op, 0
4018     if (Imm == 0) {
4019       SDValue Op = N->getOperand(0);
4020       switch (CC) {
4021       default: break;
4022       case ISD::SETEQ: {
4023         Op = SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Op), 0);
4024         SDValue Ops[] = { Op, getI32Imm(27, dl), getI32Imm(5, dl),
4025                           getI32Imm(31, dl) };
4026         CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4027         return true;
4028       }
4029       case ISD::SETNE: {
4030         if (isPPC64) break;
4031         SDValue AD =
4032           SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
4033                                          Op, getI32Imm(~0U, dl)), 0);
4034         CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, AD, Op, AD.getValue(1));
4035         return true;
4036       }
4037       case ISD::SETLT: {
4038         SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl),
4039                           getI32Imm(31, dl) };
4040         CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4041         return true;
4042       }
4043       case ISD::SETGT: {
4044         SDValue T =
4045           SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Op), 0);
4046         T = SDValue(CurDAG->getMachineNode(PPC::ANDC, dl, MVT::i32, T, Op), 0);
4047         SDValue Ops[] = { T, getI32Imm(1, dl), getI32Imm(31, dl),
4048                           getI32Imm(31, dl) };
4049         CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4050         return true;
4051       }
4052       }
4053     } else if (Imm == ~0U) {        // setcc op, -1
4054       SDValue Op = N->getOperand(0);
4055       switch (CC) {
4056       default: break;
4057       case ISD::SETEQ:
4058         if (isPPC64) break;
4059         Op = SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
4060                                             Op, getI32Imm(1, dl)), 0);
4061         CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,
4062                              SDValue(CurDAG->getMachineNode(PPC::LI, dl,
4063                                                             MVT::i32,
4064                                                             getI32Imm(0, dl)),
4065                                      0), Op.getValue(1));
4066         return true;
4067       case ISD::SETNE: {
4068         if (isPPC64) break;
4069         Op = SDValue(CurDAG->getMachineNode(PPC::NOR, dl, MVT::i32, Op, Op), 0);
4070         SDNode *AD = CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
4071                                             Op, getI32Imm(~0U, dl));
4072         CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(AD, 0), Op,
4073                              SDValue(AD, 1));
4074         return true;
4075       }
4076       case ISD::SETLT: {
4077         SDValue AD = SDValue(CurDAG->getMachineNode(PPC::ADDI, dl, MVT::i32, Op,
4078                                                     getI32Imm(1, dl)), 0);
4079         SDValue AN = SDValue(CurDAG->getMachineNode(PPC::AND, dl, MVT::i32, AD,
4080                                                     Op), 0);
4081         SDValue Ops[] = { AN, getI32Imm(1, dl), getI32Imm(31, dl),
4082                           getI32Imm(31, dl) };
4083         CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4084         return true;
4085       }
4086       case ISD::SETGT: {
4087         SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl),
4088                           getI32Imm(31, dl) };
4089         Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
4090         CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op, getI32Imm(1, dl));
4091         return true;
4092       }
4093       }
4094     }
4095   }
4096 
4097   SDValue LHS = N->getOperand(0);
4098   SDValue RHS = N->getOperand(1);
4099 
4100   // Altivec Vector compare instructions do not set any CR register by default and
4101   // vector compare operations return the same type as the operands.
4102   if (LHS.getValueType().isVector()) {
4103     if (PPCSubTarget->hasQPX() || PPCSubTarget->hasSPE())
4104       return false;
4105 
4106     EVT VecVT = LHS.getValueType();
4107     bool Swap, Negate;
4108     unsigned int VCmpInst = getVCmpInst(VecVT.getSimpleVT(), CC,
4109                                         PPCSubTarget->hasVSX(), Swap, Negate);
4110     if (Swap)
4111       std::swap(LHS, RHS);
4112 
4113     EVT ResVT = VecVT.changeVectorElementTypeToInteger();
4114     if (Negate) {
4115       SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, ResVT, LHS, RHS), 0);
4116       CurDAG->SelectNodeTo(N, PPCSubTarget->hasVSX() ? PPC::XXLNOR : PPC::VNOR,
4117                            ResVT, VCmp, VCmp);
4118       return true;
4119     }
4120 
4121     CurDAG->SelectNodeTo(N, VCmpInst, ResVT, LHS, RHS);
4122     return true;
4123   }
4124 
4125   if (PPCSubTarget->useCRBits())
4126     return false;
4127 
4128   bool Inv;
4129   unsigned Idx = getCRIdxForSetCC(CC, Inv);
4130   SDValue CCReg = SelectCC(LHS, RHS, CC, dl);
4131   SDValue IntCR;
4132 
4133   // SPE e*cmp* instructions only set the 'gt' bit, so hard-code that
4134   // The correct compare instruction is already set by SelectCC()
4135   if (PPCSubTarget->hasSPE() && LHS.getValueType().isFloatingPoint()) {
4136     Idx = 1;
4137   }
4138 
4139   // Force the ccreg into CR7.
4140   SDValue CR7Reg = CurDAG->getRegister(PPC::CR7, MVT::i32);
4141 
4142   SDValue InFlag(nullptr, 0);  // Null incoming flag value.
4143   CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, CR7Reg, CCReg,
4144                                InFlag).getValue(1);
4145 
4146   IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg,
4147                                          CCReg), 0);
4148 
4149   SDValue Ops[] = { IntCR, getI32Imm((32 - (3 - Idx)) & 31, dl),
4150                       getI32Imm(31, dl), getI32Imm(31, dl) };
4151   if (!Inv) {
4152     CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4153     return true;
4154   }
4155 
4156   // Get the specified bit.
4157   SDValue Tmp =
4158     SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
4159   CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1, dl));
4160   return true;
4161 }
4162 
4163 /// Does this node represent a load/store node whose address can be represented
4164 /// with a register plus an immediate that's a multiple of \p Val:
4165 bool PPCDAGToDAGISel::isOffsetMultipleOf(SDNode *N, unsigned Val) const {
4166   LoadSDNode *LDN = dyn_cast<LoadSDNode>(N);
4167   StoreSDNode *STN = dyn_cast<StoreSDNode>(N);
4168   SDValue AddrOp;
4169   if (LDN)
4170     AddrOp = LDN->getOperand(1);
4171   else if (STN)
4172     AddrOp = STN->getOperand(2);
4173 
4174   // If the address points a frame object or a frame object with an offset,
4175   // we need to check the object alignment.
4176   short Imm = 0;
4177   if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(
4178           AddrOp.getOpcode() == ISD::ADD ? AddrOp.getOperand(0) :
4179                                            AddrOp)) {
4180     // If op0 is a frame index that is under aligned, we can't do it either,
4181     // because it is translated to r31 or r1 + slot + offset. We won't know the
4182     // slot number until the stack frame is finalized.
4183     const MachineFrameInfo &MFI = CurDAG->getMachineFunction().getFrameInfo();
4184     unsigned SlotAlign = MFI.getObjectAlignment(FI->getIndex());
4185     if ((SlotAlign % Val) != 0)
4186       return false;
4187 
4188     // If we have an offset, we need further check on the offset.
4189     if (AddrOp.getOpcode() != ISD::ADD)
4190       return true;
4191   }
4192 
4193   if (AddrOp.getOpcode() == ISD::ADD)
4194     return isIntS16Immediate(AddrOp.getOperand(1), Imm) && !(Imm % Val);
4195 
4196   // If the address comes from the outside, the offset will be zero.
4197   return AddrOp.getOpcode() == ISD::CopyFromReg;
4198 }
4199 
4200 void PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
4201   // Transfer memoperands.
4202   MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
4203   CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
4204 }
4205 
4206 static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG,
4207                          bool &NeedSwapOps, bool &IsUnCmp) {
4208 
4209   assert(N->getOpcode() == ISD::SELECT_CC && "Expecting a SELECT_CC here.");
4210 
4211   SDValue LHS = N->getOperand(0);
4212   SDValue RHS = N->getOperand(1);
4213   SDValue TrueRes = N->getOperand(2);
4214   SDValue FalseRes = N->getOperand(3);
4215   ConstantSDNode *TrueConst = dyn_cast<ConstantSDNode>(TrueRes);
4216   if (!TrueConst)
4217     return false;
4218 
4219   assert((N->getSimpleValueType(0) == MVT::i64 ||
4220           N->getSimpleValueType(0) == MVT::i32) &&
4221          "Expecting either i64 or i32 here.");
4222 
4223   // We are looking for any of:
4224   // (select_cc lhs, rhs,  1, (sext (setcc [lr]hs, [lr]hs, cc2)), cc1)
4225   // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, cc2)), cc1)
4226   // (select_cc lhs, rhs,  0, (select_cc [lr]hs, [lr]hs,  1, -1, cc2), seteq)
4227   // (select_cc lhs, rhs,  0, (select_cc [lr]hs, [lr]hs, -1,  1, cc2), seteq)
4228   int64_t TrueResVal = TrueConst->getSExtValue();
4229   if ((TrueResVal < -1 || TrueResVal > 1) ||
4230       (TrueResVal == -1 && FalseRes.getOpcode() != ISD::ZERO_EXTEND) ||
4231       (TrueResVal == 1 && FalseRes.getOpcode() != ISD::SIGN_EXTEND) ||
4232       (TrueResVal == 0 &&
4233        (FalseRes.getOpcode() != ISD::SELECT_CC || CC != ISD::SETEQ)))
4234     return false;
4235 
4236   bool InnerIsSel = FalseRes.getOpcode() == ISD::SELECT_CC;
4237   SDValue SetOrSelCC = InnerIsSel ? FalseRes : FalseRes.getOperand(0);
4238   if (SetOrSelCC.getOpcode() != ISD::SETCC &&
4239       SetOrSelCC.getOpcode() != ISD::SELECT_CC)
4240     return false;
4241 
4242   // Without this setb optimization, the outer SELECT_CC will be manually
4243   // selected to SELECT_CC_I4/SELECT_CC_I8 Pseudo, then expand-isel-pseudos pass
4244   // transforms pseudo instruction to isel instruction. When there are more than
4245   // one use for result like zext/sext, with current optimization we only see
4246   // isel is replaced by setb but can't see any significant gain. Since
4247   // setb has longer latency than original isel, we should avoid this. Another
4248   // point is that setb requires comparison always kept, it can break the
4249   // opportunity to get the comparison away if we have in future.
4250   if (!SetOrSelCC.hasOneUse() || (!InnerIsSel && !FalseRes.hasOneUse()))
4251     return false;
4252 
4253   SDValue InnerLHS = SetOrSelCC.getOperand(0);
4254   SDValue InnerRHS = SetOrSelCC.getOperand(1);
4255   ISD::CondCode InnerCC =
4256       cast<CondCodeSDNode>(SetOrSelCC.getOperand(InnerIsSel ? 4 : 2))->get();
4257   // If the inner comparison is a select_cc, make sure the true/false values are
4258   // 1/-1 and canonicalize it if needed.
4259   if (InnerIsSel) {
4260     ConstantSDNode *SelCCTrueConst =
4261         dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(2));
4262     ConstantSDNode *SelCCFalseConst =
4263         dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(3));
4264     if (!SelCCTrueConst || !SelCCFalseConst)
4265       return false;
4266     int64_t SelCCTVal = SelCCTrueConst->getSExtValue();
4267     int64_t SelCCFVal = SelCCFalseConst->getSExtValue();
4268     // The values must be -1/1 (requiring a swap) or 1/-1.
4269     if (SelCCTVal == -1 && SelCCFVal == 1) {
4270       std::swap(InnerLHS, InnerRHS);
4271     } else if (SelCCTVal != 1 || SelCCFVal != -1)
4272       return false;
4273   }
4274 
4275   // Canonicalize unsigned case
4276   if (InnerCC == ISD::SETULT || InnerCC == ISD::SETUGT) {
4277     IsUnCmp = true;
4278     InnerCC = (InnerCC == ISD::SETULT) ? ISD::SETLT : ISD::SETGT;
4279   }
4280 
4281   bool InnerSwapped = false;
4282   if (LHS == InnerRHS && RHS == InnerLHS)
4283     InnerSwapped = true;
4284   else if (LHS != InnerLHS || RHS != InnerRHS)
4285     return false;
4286 
4287   switch (CC) {
4288   // (select_cc lhs, rhs,  0, \
4289   //     (select_cc [lr]hs, [lr]hs, 1, -1, setlt/setgt), seteq)
4290   case ISD::SETEQ:
4291     if (!InnerIsSel)
4292       return false;
4293     if (InnerCC != ISD::SETLT && InnerCC != ISD::SETGT)
4294       return false;
4295     NeedSwapOps = (InnerCC == ISD::SETGT) ? InnerSwapped : !InnerSwapped;
4296     break;
4297 
4298   // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?lt)
4299   // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setgt)), setu?lt)
4300   // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setlt)), setu?lt)
4301   // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?lt)
4302   // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setgt)), setu?lt)
4303   // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setlt)), setu?lt)
4304   case ISD::SETULT:
4305     if (!IsUnCmp && InnerCC != ISD::SETNE)
4306       return false;
4307     IsUnCmp = true;
4308     LLVM_FALLTHROUGH;
4309   case ISD::SETLT:
4310     if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETGT && !InnerSwapped) ||
4311         (InnerCC == ISD::SETLT && InnerSwapped))
4312       NeedSwapOps = (TrueResVal == 1);
4313     else
4314       return false;
4315     break;
4316 
4317   // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?gt)
4318   // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setlt)), setu?gt)
4319   // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setgt)), setu?gt)
4320   // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?gt)
4321   // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setlt)), setu?gt)
4322   // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setgt)), setu?gt)
4323   case ISD::SETUGT:
4324     if (!IsUnCmp && InnerCC != ISD::SETNE)
4325       return false;
4326     IsUnCmp = true;
4327     LLVM_FALLTHROUGH;
4328   case ISD::SETGT:
4329     if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETLT && !InnerSwapped) ||
4330         (InnerCC == ISD::SETGT && InnerSwapped))
4331       NeedSwapOps = (TrueResVal == -1);
4332     else
4333       return false;
4334     break;
4335 
4336   default:
4337     return false;
4338   }
4339 
4340   LLVM_DEBUG(dbgs() << "Found a node that can be lowered to a SETB: ");
4341   LLVM_DEBUG(N->dump());
4342 
4343   return true;
4344 }
4345 
4346 // Select - Convert the specified operand from a target-independent to a
4347 // target-specific node if it hasn't already been changed.
4348 void PPCDAGToDAGISel::Select(SDNode *N) {
4349   SDLoc dl(N);
4350   if (N->isMachineOpcode()) {
4351     N->setNodeId(-1);
4352     return;   // Already selected.
4353   }
4354 
4355   // In case any misguided DAG-level optimizations form an ADD with a
4356   // TargetConstant operand, crash here instead of miscompiling (by selecting
4357   // an r+r add instead of some kind of r+i add).
4358   if (N->getOpcode() == ISD::ADD &&
4359       N->getOperand(1).getOpcode() == ISD::TargetConstant)
4360     llvm_unreachable("Invalid ADD with TargetConstant operand");
4361 
4362   // Try matching complex bit permutations before doing anything else.
4363   if (tryBitPermutation(N))
4364     return;
4365 
4366   // Try to emit integer compares as GPR-only sequences (i.e. no use of CR).
4367   if (tryIntCompareInGPR(N))
4368     return;
4369 
4370   switch (N->getOpcode()) {
4371   default: break;
4372 
4373   case ISD::Constant:
4374     if (N->getValueType(0) == MVT::i64) {
4375       ReplaceNode(N, selectI64Imm(CurDAG, N));
4376       return;
4377     }
4378     break;
4379 
4380   case ISD::SETCC:
4381     if (trySETCC(N))
4382       return;
4383     break;
4384   // These nodes will be transformed into GETtlsADDR32 node, which
4385   // later becomes BL_TLS __tls_get_addr(sym at tlsgd)@PLT
4386   case PPCISD::ADDI_TLSLD_L_ADDR:
4387   case PPCISD::ADDI_TLSGD_L_ADDR: {
4388     const Module *Mod = MF->getFunction().getParent();
4389     if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 ||
4390         !PPCSubTarget->isSecurePlt() || !PPCSubTarget->isTargetELF() ||
4391         Mod->getPICLevel() == PICLevel::SmallPIC)
4392       break;
4393     // Attach global base pointer on GETtlsADDR32 node in order to
4394     // generate secure plt code for TLS symbols.
4395     getGlobalBaseReg();
4396   } break;
4397   case PPCISD::CALL: {
4398     const Module *M = MF->getFunction().getParent();
4399 
4400     if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 ||
4401         (!TM.isPositionIndependent() || !PPCSubTarget->isSecurePlt()) ||
4402         !PPCSubTarget->isTargetELF() || M->getPICLevel() == PICLevel::SmallPIC)
4403       break;
4404 
4405     SDValue Op = N->getOperand(1);
4406 
4407     if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
4408       if (GA->getTargetFlags() == PPCII::MO_PLT)
4409         getGlobalBaseReg();
4410     }
4411     else if (ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op)) {
4412       if (ES->getTargetFlags() == PPCII::MO_PLT)
4413         getGlobalBaseReg();
4414     }
4415   }
4416     break;
4417 
4418   case PPCISD::GlobalBaseReg:
4419     ReplaceNode(N, getGlobalBaseReg());
4420     return;
4421 
4422   case ISD::FrameIndex:
4423     selectFrameIndex(N, N);
4424     return;
4425 
4426   case PPCISD::MFOCRF: {
4427     SDValue InFlag = N->getOperand(1);
4428     ReplaceNode(N, CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32,
4429                                           N->getOperand(0), InFlag));
4430     return;
4431   }
4432 
4433   case PPCISD::READ_TIME_BASE:
4434     ReplaceNode(N, CurDAG->getMachineNode(PPC::ReadTB, dl, MVT::i32, MVT::i32,
4435                                           MVT::Other, N->getOperand(0)));
4436     return;
4437 
4438   case PPCISD::SRA_ADDZE: {
4439     SDValue N0 = N->getOperand(0);
4440     SDValue ShiftAmt =
4441       CurDAG->getTargetConstant(*cast<ConstantSDNode>(N->getOperand(1))->
4442                                   getConstantIntValue(), dl,
4443                                   N->getValueType(0));
4444     if (N->getValueType(0) == MVT::i64) {
4445       SDNode *Op =
4446         CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, MVT::Glue,
4447                                N0, ShiftAmt);
4448       CurDAG->SelectNodeTo(N, PPC::ADDZE8, MVT::i64, SDValue(Op, 0),
4449                            SDValue(Op, 1));
4450       return;
4451     } else {
4452       assert(N->getValueType(0) == MVT::i32 &&
4453              "Expecting i64 or i32 in PPCISD::SRA_ADDZE");
4454       SDNode *Op =
4455         CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue,
4456                                N0, ShiftAmt);
4457       CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, SDValue(Op, 0),
4458                            SDValue(Op, 1));
4459       return;
4460     }
4461   }
4462 
4463   case ISD::STORE: {
4464     // Change TLS initial-exec D-form stores to X-form stores.
4465     StoreSDNode *ST = cast<StoreSDNode>(N);
4466     if (EnableTLSOpt && PPCSubTarget->isELFv2ABI() &&
4467         ST->getAddressingMode() != ISD::PRE_INC)
4468       if (tryTLSXFormStore(ST))
4469         return;
4470     break;
4471   }
4472   case ISD::LOAD: {
4473     // Handle preincrement loads.
4474     LoadSDNode *LD = cast<LoadSDNode>(N);
4475     EVT LoadedVT = LD->getMemoryVT();
4476 
4477     // Normal loads are handled by code generated from the .td file.
4478     if (LD->getAddressingMode() != ISD::PRE_INC) {
4479       // Change TLS initial-exec D-form loads to X-form loads.
4480       if (EnableTLSOpt && PPCSubTarget->isELFv2ABI())
4481         if (tryTLSXFormLoad(LD))
4482           return;
4483       break;
4484     }
4485 
4486     SDValue Offset = LD->getOffset();
4487     if (Offset.getOpcode() == ISD::TargetConstant ||
4488         Offset.getOpcode() == ISD::TargetGlobalAddress) {
4489 
4490       unsigned Opcode;
4491       bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
4492       if (LD->getValueType(0) != MVT::i64) {
4493         // Handle PPC32 integer and normal FP loads.
4494         assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
4495         switch (LoadedVT.getSimpleVT().SimpleTy) {
4496           default: llvm_unreachable("Invalid PPC load type!");
4497           case MVT::f64: Opcode = PPC::LFDU; break;
4498           case MVT::f32: Opcode = PPC::LFSU; break;
4499           case MVT::i32: Opcode = PPC::LWZU; break;
4500           case MVT::i16: Opcode = isSExt ? PPC::LHAU : PPC::LHZU; break;
4501           case MVT::i1:
4502           case MVT::i8:  Opcode = PPC::LBZU; break;
4503         }
4504       } else {
4505         assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!");
4506         assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
4507         switch (LoadedVT.getSimpleVT().SimpleTy) {
4508           default: llvm_unreachable("Invalid PPC load type!");
4509           case MVT::i64: Opcode = PPC::LDU; break;
4510           case MVT::i32: Opcode = PPC::LWZU8; break;
4511           case MVT::i16: Opcode = isSExt ? PPC::LHAU8 : PPC::LHZU8; break;
4512           case MVT::i1:
4513           case MVT::i8:  Opcode = PPC::LBZU8; break;
4514         }
4515       }
4516 
4517       SDValue Chain = LD->getChain();
4518       SDValue Base = LD->getBasePtr();
4519       SDValue Ops[] = { Offset, Base, Chain };
4520       SDNode *MN = CurDAG->getMachineNode(
4521           Opcode, dl, LD->getValueType(0),
4522           PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops);
4523       transferMemOperands(N, MN);
4524       ReplaceNode(N, MN);
4525       return;
4526     } else {
4527       unsigned Opcode;
4528       bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
4529       if (LD->getValueType(0) != MVT::i64) {
4530         // Handle PPC32 integer and normal FP loads.
4531         assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
4532         switch (LoadedVT.getSimpleVT().SimpleTy) {
4533           default: llvm_unreachable("Invalid PPC load type!");
4534           case MVT::v4f64: Opcode = PPC::QVLFDUX; break; // QPX
4535           case MVT::v4f32: Opcode = PPC::QVLFSUX; break; // QPX
4536           case MVT::f64: Opcode = PPC::LFDUX; break;
4537           case MVT::f32: Opcode = PPC::LFSUX; break;
4538           case MVT::i32: Opcode = PPC::LWZUX; break;
4539           case MVT::i16: Opcode = isSExt ? PPC::LHAUX : PPC::LHZUX; break;
4540           case MVT::i1:
4541           case MVT::i8:  Opcode = PPC::LBZUX; break;
4542         }
4543       } else {
4544         assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!");
4545         assert((!isSExt || LoadedVT == MVT::i16 || LoadedVT == MVT::i32) &&
4546                "Invalid sext update load");
4547         switch (LoadedVT.getSimpleVT().SimpleTy) {
4548           default: llvm_unreachable("Invalid PPC load type!");
4549           case MVT::i64: Opcode = PPC::LDUX; break;
4550           case MVT::i32: Opcode = isSExt ? PPC::LWAUX  : PPC::LWZUX8; break;
4551           case MVT::i16: Opcode = isSExt ? PPC::LHAUX8 : PPC::LHZUX8; break;
4552           case MVT::i1:
4553           case MVT::i8:  Opcode = PPC::LBZUX8; break;
4554         }
4555       }
4556 
4557       SDValue Chain = LD->getChain();
4558       SDValue Base = LD->getBasePtr();
4559       SDValue Ops[] = { Base, Offset, Chain };
4560       SDNode *MN = CurDAG->getMachineNode(
4561           Opcode, dl, LD->getValueType(0),
4562           PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops);
4563       transferMemOperands(N, MN);
4564       ReplaceNode(N, MN);
4565       return;
4566     }
4567   }
4568 
4569   case ISD::AND: {
4570     unsigned Imm, Imm2, SH, MB, ME;
4571     uint64_t Imm64;
4572 
4573     // If this is an and of a value rotated between 0 and 31 bits and then and'd
4574     // with a mask, emit rlwinm
4575     if (isInt32Immediate(N->getOperand(1), Imm) &&
4576         isRotateAndMask(N->getOperand(0).getNode(), Imm, false, SH, MB, ME)) {
4577       SDValue Val = N->getOperand(0).getOperand(0);
4578       SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl),
4579                         getI32Imm(ME, dl) };
4580       CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4581       return;
4582     }
4583     // If this is just a masked value where the input is not handled above, and
4584     // is not a rotate-left (handled by a pattern in the .td file), emit rlwinm
4585     if (isInt32Immediate(N->getOperand(1), Imm) &&
4586         isRunOfOnes(Imm, MB, ME) &&
4587         N->getOperand(0).getOpcode() != ISD::ROTL) {
4588       SDValue Val = N->getOperand(0);
4589       SDValue Ops[] = { Val, getI32Imm(0, dl), getI32Imm(MB, dl),
4590                         getI32Imm(ME, dl) };
4591       CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4592       return;
4593     }
4594     // If this is a 64-bit zero-extension mask, emit rldicl.
4595     if (isInt64Immediate(N->getOperand(1).getNode(), Imm64) &&
4596         isMask_64(Imm64)) {
4597       SDValue Val = N->getOperand(0);
4598       MB = 64 - countTrailingOnes(Imm64);
4599       SH = 0;
4600 
4601       if (Val.getOpcode() == ISD::ANY_EXTEND) {
4602         auto Op0 = Val.getOperand(0);
4603         if ( Op0.getOpcode() == ISD::SRL &&
4604            isInt32Immediate(Op0.getOperand(1).getNode(), Imm) && Imm <= MB) {
4605 
4606            auto ResultType = Val.getNode()->getValueType(0);
4607            auto ImDef = CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl,
4608                                                ResultType);
4609            SDValue IDVal (ImDef, 0);
4610 
4611            Val = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl,
4612                          ResultType, IDVal, Op0.getOperand(0),
4613                          getI32Imm(1, dl)), 0);
4614            SH = 64 - Imm;
4615         }
4616       }
4617 
4618       // If the operand is a logical right shift, we can fold it into this
4619       // instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb)
4620       // for n <= mb. The right shift is really a left rotate followed by a
4621       // mask, and this mask is a more-restrictive sub-mask of the mask implied
4622       // by the shift.
4623       if (Val.getOpcode() == ISD::SRL &&
4624           isInt32Immediate(Val.getOperand(1).getNode(), Imm) && Imm <= MB) {
4625         assert(Imm < 64 && "Illegal shift amount");
4626         Val = Val.getOperand(0);
4627         SH = 64 - Imm;
4628       }
4629 
4630       SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl) };
4631       CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);
4632       return;
4633     }
4634     // If this is a negated 64-bit zero-extension mask,
4635     // i.e. the immediate is a sequence of ones from most significant side
4636     // and all zero for reminder, we should use rldicr.
4637     if (isInt64Immediate(N->getOperand(1).getNode(), Imm64) &&
4638         isMask_64(~Imm64)) {
4639       SDValue Val = N->getOperand(0);
4640       MB = 63 - countTrailingOnes(~Imm64);
4641       SH = 0;
4642       SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl) };
4643       CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops);
4644       return;
4645     }
4646 
4647     // AND X, 0 -> 0, not "rlwinm 32".
4648     if (isInt32Immediate(N->getOperand(1), Imm) && (Imm == 0)) {
4649       ReplaceUses(SDValue(N, 0), N->getOperand(1));
4650       return;
4651     }
4652     // ISD::OR doesn't get all the bitfield insertion fun.
4653     // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) might be a
4654     // bitfield insert.
4655     if (isInt32Immediate(N->getOperand(1), Imm) &&
4656         N->getOperand(0).getOpcode() == ISD::OR &&
4657         isInt32Immediate(N->getOperand(0).getOperand(1), Imm2)) {
4658       // The idea here is to check whether this is equivalent to:
4659       //   (c1 & m) | (x & ~m)
4660       // where m is a run-of-ones mask. The logic here is that, for each bit in
4661       // c1 and c2:
4662       //  - if both are 1, then the output will be 1.
4663       //  - if both are 0, then the output will be 0.
4664       //  - if the bit in c1 is 0, and the bit in c2 is 1, then the output will
4665       //    come from x.
4666       //  - if the bit in c1 is 1, and the bit in c2 is 0, then the output will
4667       //    be 0.
4668       //  If that last condition is never the case, then we can form m from the
4669       //  bits that are the same between c1 and c2.
4670       unsigned MB, ME;
4671       if (isRunOfOnes(~(Imm^Imm2), MB, ME) && !(~Imm & Imm2)) {
4672         SDValue Ops[] = { N->getOperand(0).getOperand(0),
4673                             N->getOperand(0).getOperand(1),
4674                             getI32Imm(0, dl), getI32Imm(MB, dl),
4675                             getI32Imm(ME, dl) };
4676         ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops));
4677         return;
4678       }
4679     }
4680 
4681     // Other cases are autogenerated.
4682     break;
4683   }
4684   case ISD::OR: {
4685     if (N->getValueType(0) == MVT::i32)
4686       if (tryBitfieldInsert(N))
4687         return;
4688 
4689     int16_t Imm;
4690     if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&
4691         isIntS16Immediate(N->getOperand(1), Imm)) {
4692       KnownBits LHSKnown = CurDAG->computeKnownBits(N->getOperand(0));
4693 
4694       // If this is equivalent to an add, then we can fold it with the
4695       // FrameIndex calculation.
4696       if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)Imm) == ~0ULL) {
4697         selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm);
4698         return;
4699       }
4700     }
4701 
4702     // OR with a 32-bit immediate can be handled by ori + oris
4703     // without creating an immediate in a GPR.
4704     uint64_t Imm64 = 0;
4705     bool IsPPC64 = PPCSubTarget->isPPC64();
4706     if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) &&
4707         (Imm64 & ~0xFFFFFFFFuLL) == 0) {
4708       // If ImmHi (ImmHi) is zero, only one ori (oris) is generated later.
4709       uint64_t ImmHi = Imm64 >> 16;
4710       uint64_t ImmLo = Imm64 & 0xFFFF;
4711       if (ImmHi != 0 && ImmLo != 0) {
4712         SDNode *Lo = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
4713                                             N->getOperand(0),
4714                                             getI16Imm(ImmLo, dl));
4715         SDValue Ops1[] = { SDValue(Lo, 0), getI16Imm(ImmHi, dl)};
4716         CurDAG->SelectNodeTo(N, PPC::ORIS8, MVT::i64, Ops1);
4717         return;
4718       }
4719     }
4720 
4721     // Other cases are autogenerated.
4722     break;
4723   }
4724   case ISD::XOR: {
4725     // XOR with a 32-bit immediate can be handled by xori + xoris
4726     // without creating an immediate in a GPR.
4727     uint64_t Imm64 = 0;
4728     bool IsPPC64 = PPCSubTarget->isPPC64();
4729     if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) &&
4730         (Imm64 & ~0xFFFFFFFFuLL) == 0) {
4731       // If ImmHi (ImmHi) is zero, only one xori (xoris) is generated later.
4732       uint64_t ImmHi = Imm64 >> 16;
4733       uint64_t ImmLo = Imm64 & 0xFFFF;
4734       if (ImmHi != 0 && ImmLo != 0) {
4735         SDNode *Lo = CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,
4736                                             N->getOperand(0),
4737                                             getI16Imm(ImmLo, dl));
4738         SDValue Ops1[] = { SDValue(Lo, 0), getI16Imm(ImmHi, dl)};
4739         CurDAG->SelectNodeTo(N, PPC::XORIS8, MVT::i64, Ops1);
4740         return;
4741       }
4742     }
4743 
4744     break;
4745   }
4746   case ISD::ADD: {
4747     int16_t Imm;
4748     if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&
4749         isIntS16Immediate(N->getOperand(1), Imm)) {
4750       selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm);
4751       return;
4752     }
4753 
4754     break;
4755   }
4756   case ISD::SHL: {
4757     unsigned Imm, SH, MB, ME;
4758     if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
4759         isRotateAndMask(N, Imm, true, SH, MB, ME)) {
4760       SDValue Ops[] = { N->getOperand(0).getOperand(0),
4761                           getI32Imm(SH, dl), getI32Imm(MB, dl),
4762                           getI32Imm(ME, dl) };
4763       CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4764       return;
4765     }
4766 
4767     // Other cases are autogenerated.
4768     break;
4769   }
4770   case ISD::SRL: {
4771     unsigned Imm, SH, MB, ME;
4772     if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
4773         isRotateAndMask(N, Imm, true, SH, MB, ME)) {
4774       SDValue Ops[] = { N->getOperand(0).getOperand(0),
4775                           getI32Imm(SH, dl), getI32Imm(MB, dl),
4776                           getI32Imm(ME, dl) };
4777       CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4778       return;
4779     }
4780 
4781     // Other cases are autogenerated.
4782     break;
4783   }
4784   // FIXME: Remove this once the ANDI glue bug is fixed:
4785   case PPCISD::ANDIo_1_EQ_BIT:
4786   case PPCISD::ANDIo_1_GT_BIT: {
4787     if (!ANDIGlueBug)
4788       break;
4789 
4790     EVT InVT = N->getOperand(0).getValueType();
4791     assert((InVT == MVT::i64 || InVT == MVT::i32) &&
4792            "Invalid input type for ANDIo_1_EQ_BIT");
4793 
4794     unsigned Opcode = (InVT == MVT::i64) ? PPC::ANDIo8 : PPC::ANDIo;
4795     SDValue AndI(CurDAG->getMachineNode(Opcode, dl, InVT, MVT::Glue,
4796                                         N->getOperand(0),
4797                                         CurDAG->getTargetConstant(1, dl, InVT)),
4798                  0);
4799     SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32);
4800     SDValue SRIdxVal =
4801       CurDAG->getTargetConstant(N->getOpcode() == PPCISD::ANDIo_1_EQ_BIT ?
4802                                 PPC::sub_eq : PPC::sub_gt, dl, MVT::i32);
4803 
4804     CurDAG->SelectNodeTo(N, TargetOpcode::EXTRACT_SUBREG, MVT::i1, CR0Reg,
4805                          SRIdxVal, SDValue(AndI.getNode(), 1) /* glue */);
4806     return;
4807   }
4808   case ISD::SELECT_CC: {
4809     ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
4810     EVT PtrVT =
4811         CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout());
4812     bool isPPC64 = (PtrVT == MVT::i64);
4813 
4814     // If this is a select of i1 operands, we'll pattern match it.
4815     if (PPCSubTarget->useCRBits() &&
4816         N->getOperand(0).getValueType() == MVT::i1)
4817       break;
4818 
4819     if (PPCSubTarget->isISA3_0() && PPCSubTarget->isPPC64()) {
4820       bool NeedSwapOps = false;
4821       bool IsUnCmp = false;
4822       if (mayUseP9Setb(N, CC, CurDAG, NeedSwapOps, IsUnCmp)) {
4823         SDValue LHS = N->getOperand(0);
4824         SDValue RHS = N->getOperand(1);
4825         if (NeedSwapOps)
4826           std::swap(LHS, RHS);
4827 
4828         // Make use of SelectCC to generate the comparison to set CR bits, for
4829         // equality comparisons having one literal operand, SelectCC probably
4830         // doesn't need to materialize the whole literal and just use xoris to
4831         // check it first, it leads the following comparison result can't
4832         // exactly represent GT/LT relationship. So to avoid this we specify
4833         // SETGT/SETUGT here instead of SETEQ.
4834         SDValue GenCC =
4835             SelectCC(LHS, RHS, IsUnCmp ? ISD::SETUGT : ISD::SETGT, dl);
4836         CurDAG->SelectNodeTo(
4837             N, N->getSimpleValueType(0) == MVT::i64 ? PPC::SETB8 : PPC::SETB,
4838             N->getValueType(0), GenCC);
4839         NumP9Setb++;
4840         return;
4841       }
4842     }
4843 
4844     // Handle the setcc cases here.  select_cc lhs, 0, 1, 0, cc
4845     if (!isPPC64)
4846       if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)))
4847         if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N->getOperand(2)))
4848           if (ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N->getOperand(3)))
4849             if (N1C->isNullValue() && N3C->isNullValue() &&
4850                 N2C->getZExtValue() == 1ULL && CC == ISD::SETNE &&
4851                 // FIXME: Implement this optzn for PPC64.
4852                 N->getValueType(0) == MVT::i32) {
4853               SDNode *Tmp =
4854                 CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
4855                                        N->getOperand(0), getI32Imm(~0U, dl));
4856               CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(Tmp, 0),
4857                                    N->getOperand(0), SDValue(Tmp, 1));
4858               return;
4859             }
4860 
4861     SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl);
4862 
4863     if (N->getValueType(0) == MVT::i1) {
4864       // An i1 select is: (c & t) | (!c & f).
4865       bool Inv;
4866       unsigned Idx = getCRIdxForSetCC(CC, Inv);
4867 
4868       unsigned SRI;
4869       switch (Idx) {
4870       default: llvm_unreachable("Invalid CC index");
4871       case 0: SRI = PPC::sub_lt; break;
4872       case 1: SRI = PPC::sub_gt; break;
4873       case 2: SRI = PPC::sub_eq; break;
4874       case 3: SRI = PPC::sub_un; break;
4875       }
4876 
4877       SDValue CCBit = CurDAG->getTargetExtractSubreg(SRI, dl, MVT::i1, CCReg);
4878 
4879       SDValue NotCCBit(CurDAG->getMachineNode(PPC::CRNOR, dl, MVT::i1,
4880                                               CCBit, CCBit), 0);
4881       SDValue C =    Inv ? NotCCBit : CCBit,
4882               NotC = Inv ? CCBit    : NotCCBit;
4883 
4884       SDValue CAndT(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1,
4885                                            C, N->getOperand(2)), 0);
4886       SDValue NotCAndF(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1,
4887                                               NotC, N->getOperand(3)), 0);
4888 
4889       CurDAG->SelectNodeTo(N, PPC::CROR, MVT::i1, CAndT, NotCAndF);
4890       return;
4891     }
4892 
4893     unsigned BROpc = getPredicateForSetCC(CC);
4894 
4895     unsigned SelectCCOp;
4896     if (N->getValueType(0) == MVT::i32)
4897       SelectCCOp = PPC::SELECT_CC_I4;
4898     else if (N->getValueType(0) == MVT::i64)
4899       SelectCCOp = PPC::SELECT_CC_I8;
4900     else if (N->getValueType(0) == MVT::f32) {
4901       if (PPCSubTarget->hasP8Vector())
4902         SelectCCOp = PPC::SELECT_CC_VSSRC;
4903       else if (PPCSubTarget->hasSPE())
4904         SelectCCOp = PPC::SELECT_CC_SPE4;
4905       else
4906         SelectCCOp = PPC::SELECT_CC_F4;
4907     } else if (N->getValueType(0) == MVT::f64) {
4908       if (PPCSubTarget->hasVSX())
4909         SelectCCOp = PPC::SELECT_CC_VSFRC;
4910       else if (PPCSubTarget->hasSPE())
4911         SelectCCOp = PPC::SELECT_CC_SPE;
4912       else
4913         SelectCCOp = PPC::SELECT_CC_F8;
4914     } else if (N->getValueType(0) == MVT::f128)
4915       SelectCCOp = PPC::SELECT_CC_F16;
4916     else if (PPCSubTarget->hasSPE())
4917       SelectCCOp = PPC::SELECT_CC_SPE;
4918     else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4f64)
4919       SelectCCOp = PPC::SELECT_CC_QFRC;
4920     else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4f32)
4921       SelectCCOp = PPC::SELECT_CC_QSRC;
4922     else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4i1)
4923       SelectCCOp = PPC::SELECT_CC_QBRC;
4924     else if (N->getValueType(0) == MVT::v2f64 ||
4925              N->getValueType(0) == MVT::v2i64)
4926       SelectCCOp = PPC::SELECT_CC_VSRC;
4927     else
4928       SelectCCOp = PPC::SELECT_CC_VRRC;
4929 
4930     SDValue Ops[] = { CCReg, N->getOperand(2), N->getOperand(3),
4931                         getI32Imm(BROpc, dl) };
4932     CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops);
4933     return;
4934   }
4935   case ISD::VECTOR_SHUFFLE:
4936     if (PPCSubTarget->hasVSX() && (N->getValueType(0) == MVT::v2f64 ||
4937                                   N->getValueType(0) == MVT::v2i64)) {
4938       ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
4939 
4940       SDValue Op1 = N->getOperand(SVN->getMaskElt(0) < 2 ? 0 : 1),
4941               Op2 = N->getOperand(SVN->getMaskElt(1) < 2 ? 0 : 1);
4942       unsigned DM[2];
4943 
4944       for (int i = 0; i < 2; ++i)
4945         if (SVN->getMaskElt(i) <= 0 || SVN->getMaskElt(i) == 2)
4946           DM[i] = 0;
4947         else
4948           DM[i] = 1;
4949 
4950       if (Op1 == Op2 && DM[0] == 0 && DM[1] == 0 &&
4951           Op1.getOpcode() == ISD::SCALAR_TO_VECTOR &&
4952           isa<LoadSDNode>(Op1.getOperand(0))) {
4953         LoadSDNode *LD = cast<LoadSDNode>(Op1.getOperand(0));
4954         SDValue Base, Offset;
4955 
4956         if (LD->isUnindexed() && LD->hasOneUse() && Op1.hasOneUse() &&
4957             (LD->getMemoryVT() == MVT::f64 ||
4958              LD->getMemoryVT() == MVT::i64) &&
4959             SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) {
4960           SDValue Chain = LD->getChain();
4961           SDValue Ops[] = { Base, Offset, Chain };
4962           MachineMemOperand *MemOp = LD->getMemOperand();
4963           SDNode *NewN = CurDAG->SelectNodeTo(N, PPC::LXVDSX,
4964                                               N->getValueType(0), Ops);
4965           CurDAG->setNodeMemRefs(cast<MachineSDNode>(NewN), {MemOp});
4966           return;
4967         }
4968       }
4969 
4970       // For little endian, we must swap the input operands and adjust
4971       // the mask elements (reverse and invert them).
4972       if (PPCSubTarget->isLittleEndian()) {
4973         std::swap(Op1, Op2);
4974         unsigned tmp = DM[0];
4975         DM[0] = 1 - DM[1];
4976         DM[1] = 1 - tmp;
4977       }
4978 
4979       SDValue DMV = CurDAG->getTargetConstant(DM[1] | (DM[0] << 1), dl,
4980                                               MVT::i32);
4981       SDValue Ops[] = { Op1, Op2, DMV };
4982       CurDAG->SelectNodeTo(N, PPC::XXPERMDI, N->getValueType(0), Ops);
4983       return;
4984     }
4985 
4986     break;
4987   case PPCISD::BDNZ:
4988   case PPCISD::BDZ: {
4989     bool IsPPC64 = PPCSubTarget->isPPC64();
4990     SDValue Ops[] = { N->getOperand(1), N->getOperand(0) };
4991     CurDAG->SelectNodeTo(N, N->getOpcode() == PPCISD::BDNZ
4992                                 ? (IsPPC64 ? PPC::BDNZ8 : PPC::BDNZ)
4993                                 : (IsPPC64 ? PPC::BDZ8 : PPC::BDZ),
4994                          MVT::Other, Ops);
4995     return;
4996   }
4997   case PPCISD::COND_BRANCH: {
4998     // Op #0 is the Chain.
4999     // Op #1 is the PPC::PRED_* number.
5000     // Op #2 is the CR#
5001     // Op #3 is the Dest MBB
5002     // Op #4 is the Flag.
5003     // Prevent PPC::PRED_* from being selected into LI.
5004     unsigned PCC = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
5005     if (EnableBranchHint)
5006       PCC |= getBranchHint(PCC, FuncInfo, N->getOperand(3));
5007 
5008     SDValue Pred = getI32Imm(PCC, dl);
5009     SDValue Ops[] = { Pred, N->getOperand(2), N->getOperand(3),
5010       N->getOperand(0), N->getOperand(4) };
5011     CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
5012     return;
5013   }
5014   case ISD::BR_CC: {
5015     ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
5016     unsigned PCC = getPredicateForSetCC(CC);
5017 
5018     if (N->getOperand(2).getValueType() == MVT::i1) {
5019       unsigned Opc;
5020       bool Swap;
5021       switch (PCC) {
5022       default: llvm_unreachable("Unexpected Boolean-operand predicate");
5023       case PPC::PRED_LT: Opc = PPC::CRANDC; Swap = true;  break;
5024       case PPC::PRED_LE: Opc = PPC::CRORC;  Swap = true;  break;
5025       case PPC::PRED_EQ: Opc = PPC::CREQV;  Swap = false; break;
5026       case PPC::PRED_GE: Opc = PPC::CRORC;  Swap = false; break;
5027       case PPC::PRED_GT: Opc = PPC::CRANDC; Swap = false; break;
5028       case PPC::PRED_NE: Opc = PPC::CRXOR;  Swap = false; break;
5029       }
5030 
5031       // A signed comparison of i1 values produces the opposite result to an
5032       // unsigned one if the condition code includes less-than or greater-than.
5033       // This is because 1 is the most negative signed i1 number and the most
5034       // positive unsigned i1 number. The CR-logical operations used for such
5035       // comparisons are non-commutative so for signed comparisons vs. unsigned
5036       // ones, the input operands just need to be swapped.
5037       if (ISD::isSignedIntSetCC(CC))
5038         Swap = !Swap;
5039 
5040       SDValue BitComp(CurDAG->getMachineNode(Opc, dl, MVT::i1,
5041                                              N->getOperand(Swap ? 3 : 2),
5042                                              N->getOperand(Swap ? 2 : 3)), 0);
5043       CurDAG->SelectNodeTo(N, PPC::BC, MVT::Other, BitComp, N->getOperand(4),
5044                            N->getOperand(0));
5045       return;
5046     }
5047 
5048     if (EnableBranchHint)
5049       PCC |= getBranchHint(PCC, FuncInfo, N->getOperand(4));
5050 
5051     SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl);
5052     SDValue Ops[] = { getI32Imm(PCC, dl), CondCode,
5053                         N->getOperand(4), N->getOperand(0) };
5054     CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
5055     return;
5056   }
5057   case ISD::BRIND: {
5058     // FIXME: Should custom lower this.
5059     SDValue Chain = N->getOperand(0);
5060     SDValue Target = N->getOperand(1);
5061     unsigned Opc = Target.getValueType() == MVT::i32 ? PPC::MTCTR : PPC::MTCTR8;
5062     unsigned Reg = Target.getValueType() == MVT::i32 ? PPC::BCTR : PPC::BCTR8;
5063     Chain = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Target,
5064                                            Chain), 0);
5065     CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain);
5066     return;
5067   }
5068   case PPCISD::TOC_ENTRY: {
5069     assert ((PPCSubTarget->isPPC64() || PPCSubTarget->isSVR4ABI()) &&
5070             "Only supported for 64-bit ABI and 32-bit SVR4");
5071     if (PPCSubTarget->isSVR4ABI() && !PPCSubTarget->isPPC64()) {
5072       SDValue GA = N->getOperand(0);
5073       SDNode *MN = CurDAG->getMachineNode(PPC::LWZtoc, dl, MVT::i32, GA,
5074                                           N->getOperand(1));
5075       transferMemOperands(N, MN);
5076       ReplaceNode(N, MN);
5077       return;
5078     }
5079 
5080     // For medium and large code model, we generate two instructions as
5081     // described below.  Otherwise we allow SelectCodeCommon to handle this,
5082     // selecting one of LDtoc, LDtocJTI, LDtocCPT, and LDtocBA.
5083     CodeModel::Model CModel = TM.getCodeModel();
5084     if (CModel != CodeModel::Medium && CModel != CodeModel::Large)
5085       break;
5086 
5087     // The first source operand is a TargetGlobalAddress or a TargetJumpTable.
5088     // If it must be toc-referenced according to PPCSubTarget, we generate:
5089     //   LDtocL(@sym, ADDIStocHA(%x2, @sym))
5090     // Otherwise we generate:
5091     //   ADDItocL(ADDIStocHA(%x2, @sym), @sym)
5092     SDValue GA = N->getOperand(0);
5093     SDValue TOCbase = N->getOperand(1);
5094     SDNode *Tmp = CurDAG->getMachineNode(PPC::ADDIStocHA, dl, MVT::i64,
5095                                          TOCbase, GA);
5096     if (PPCLowering->isAccessedAsGotIndirect(GA)) {
5097       // If it is access as got-indirect, we need an extra LD to load
5098       // the address.
5099       SDNode *MN = CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA,
5100                                           SDValue(Tmp, 0));
5101       transferMemOperands(N, MN);
5102       ReplaceNode(N, MN);
5103       return;
5104     }
5105 
5106     // Build the address relative to the TOC-pointer..
5107     ReplaceNode(N, CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64,
5108                                           SDValue(Tmp, 0), GA));
5109     return;
5110   }
5111   case PPCISD::PPC32_PICGOT:
5112     // Generate a PIC-safe GOT reference.
5113     assert(!PPCSubTarget->isPPC64() && PPCSubTarget->isSVR4ABI() &&
5114       "PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4");
5115     CurDAG->SelectNodeTo(N, PPC::PPC32PICGOT,
5116                          PPCLowering->getPointerTy(CurDAG->getDataLayout()),
5117                          MVT::i32);
5118     return;
5119 
5120   case PPCISD::VADD_SPLAT: {
5121     // This expands into one of three sequences, depending on whether
5122     // the first operand is odd or even, positive or negative.
5123     assert(isa<ConstantSDNode>(N->getOperand(0)) &&
5124            isa<ConstantSDNode>(N->getOperand(1)) &&
5125            "Invalid operand on VADD_SPLAT!");
5126 
5127     int Elt     = N->getConstantOperandVal(0);
5128     int EltSize = N->getConstantOperandVal(1);
5129     unsigned Opc1, Opc2, Opc3;
5130     EVT VT;
5131 
5132     if (EltSize == 1) {
5133       Opc1 = PPC::VSPLTISB;
5134       Opc2 = PPC::VADDUBM;
5135       Opc3 = PPC::VSUBUBM;
5136       VT = MVT::v16i8;
5137     } else if (EltSize == 2) {
5138       Opc1 = PPC::VSPLTISH;
5139       Opc2 = PPC::VADDUHM;
5140       Opc3 = PPC::VSUBUHM;
5141       VT = MVT::v8i16;
5142     } else {
5143       assert(EltSize == 4 && "Invalid element size on VADD_SPLAT!");
5144       Opc1 = PPC::VSPLTISW;
5145       Opc2 = PPC::VADDUWM;
5146       Opc3 = PPC::VSUBUWM;
5147       VT = MVT::v4i32;
5148     }
5149 
5150     if ((Elt & 1) == 0) {
5151       // Elt is even, in the range [-32,-18] + [16,30].
5152       //
5153       // Convert: VADD_SPLAT elt, size
5154       // Into:    tmp = VSPLTIS[BHW] elt
5155       //          VADDU[BHW]M tmp, tmp
5156       // Where:   [BHW] = B for size = 1, H for size = 2, W for size = 4
5157       SDValue EltVal = getI32Imm(Elt >> 1, dl);
5158       SDNode *Tmp = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
5159       SDValue TmpVal = SDValue(Tmp, 0);
5160       ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, TmpVal, TmpVal));
5161       return;
5162     } else if (Elt > 0) {
5163       // Elt is odd and positive, in the range [17,31].
5164       //
5165       // Convert: VADD_SPLAT elt, size
5166       // Into:    tmp1 = VSPLTIS[BHW] elt-16
5167       //          tmp2 = VSPLTIS[BHW] -16
5168       //          VSUBU[BHW]M tmp1, tmp2
5169       SDValue EltVal = getI32Imm(Elt - 16, dl);
5170       SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
5171       EltVal = getI32Imm(-16, dl);
5172       SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
5173       ReplaceNode(N, CurDAG->getMachineNode(Opc3, dl, VT, SDValue(Tmp1, 0),
5174                                             SDValue(Tmp2, 0)));
5175       return;
5176     } else {
5177       // Elt is odd and negative, in the range [-31,-17].
5178       //
5179       // Convert: VADD_SPLAT elt, size
5180       // Into:    tmp1 = VSPLTIS[BHW] elt+16
5181       //          tmp2 = VSPLTIS[BHW] -16
5182       //          VADDU[BHW]M tmp1, tmp2
5183       SDValue EltVal = getI32Imm(Elt + 16, dl);
5184       SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
5185       EltVal = getI32Imm(-16, dl);
5186       SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
5187       ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, SDValue(Tmp1, 0),
5188                                             SDValue(Tmp2, 0)));
5189       return;
5190     }
5191   }
5192   }
5193 
5194   SelectCode(N);
5195 }
5196 
5197 // If the target supports the cmpb instruction, do the idiom recognition here.
5198 // We don't do this as a DAG combine because we don't want to do it as nodes
5199 // are being combined (because we might miss part of the eventual idiom). We
5200 // don't want to do it during instruction selection because we want to reuse
5201 // the logic for lowering the masking operations already part of the
5202 // instruction selector.
5203 SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) {
5204   SDLoc dl(N);
5205 
5206   assert(N->getOpcode() == ISD::OR &&
5207          "Only OR nodes are supported for CMPB");
5208 
5209   SDValue Res;
5210   if (!PPCSubTarget->hasCMPB())
5211     return Res;
5212 
5213   if (N->getValueType(0) != MVT::i32 &&
5214       N->getValueType(0) != MVT::i64)
5215     return Res;
5216 
5217   EVT VT = N->getValueType(0);
5218 
5219   SDValue RHS, LHS;
5220   bool BytesFound[8] = {false, false, false, false, false, false, false, false};
5221   uint64_t Mask = 0, Alt = 0;
5222 
5223   auto IsByteSelectCC = [this](SDValue O, unsigned &b,
5224                                uint64_t &Mask, uint64_t &Alt,
5225                                SDValue &LHS, SDValue &RHS) {
5226     if (O.getOpcode() != ISD::SELECT_CC)
5227       return false;
5228     ISD::CondCode CC = cast<CondCodeSDNode>(O.getOperand(4))->get();
5229 
5230     if (!isa<ConstantSDNode>(O.getOperand(2)) ||
5231         !isa<ConstantSDNode>(O.getOperand(3)))
5232       return false;
5233 
5234     uint64_t PM = O.getConstantOperandVal(2);
5235     uint64_t PAlt = O.getConstantOperandVal(3);
5236     for (b = 0; b < 8; ++b) {
5237       uint64_t Mask = UINT64_C(0xFF) << (8*b);
5238       if (PM && (PM & Mask) == PM && (PAlt & Mask) == PAlt)
5239         break;
5240     }
5241 
5242     if (b == 8)
5243       return false;
5244     Mask |= PM;
5245     Alt  |= PAlt;
5246 
5247     if (!isa<ConstantSDNode>(O.getOperand(1)) ||
5248         O.getConstantOperandVal(1) != 0) {
5249       SDValue Op0 = O.getOperand(0), Op1 = O.getOperand(1);
5250       if (Op0.getOpcode() == ISD::TRUNCATE)
5251         Op0 = Op0.getOperand(0);
5252       if (Op1.getOpcode() == ISD::TRUNCATE)
5253         Op1 = Op1.getOperand(0);
5254 
5255       if (Op0.getOpcode() == ISD::SRL && Op1.getOpcode() == ISD::SRL &&
5256           Op0.getOperand(1) == Op1.getOperand(1) && CC == ISD::SETEQ &&
5257           isa<ConstantSDNode>(Op0.getOperand(1))) {
5258 
5259         unsigned Bits = Op0.getValueSizeInBits();
5260         if (b != Bits/8-1)
5261           return false;
5262         if (Op0.getConstantOperandVal(1) != Bits-8)
5263           return false;
5264 
5265         LHS = Op0.getOperand(0);
5266         RHS = Op1.getOperand(0);
5267         return true;
5268       }
5269 
5270       // When we have small integers (i16 to be specific), the form present
5271       // post-legalization uses SETULT in the SELECT_CC for the
5272       // higher-order byte, depending on the fact that the
5273       // even-higher-order bytes are known to all be zero, for example:
5274       //   select_cc (xor $lhs, $rhs), 256, 65280, 0, setult
5275       // (so when the second byte is the same, because all higher-order
5276       // bits from bytes 3 and 4 are known to be zero, the result of the
5277       // xor can be at most 255)
5278       if (Op0.getOpcode() == ISD::XOR && CC == ISD::SETULT &&
5279           isa<ConstantSDNode>(O.getOperand(1))) {
5280 
5281         uint64_t ULim = O.getConstantOperandVal(1);
5282         if (ULim != (UINT64_C(1) << b*8))
5283           return false;
5284 
5285         // Now we need to make sure that the upper bytes are known to be
5286         // zero.
5287         unsigned Bits = Op0.getValueSizeInBits();
5288         if (!CurDAG->MaskedValueIsZero(
5289                 Op0, APInt::getHighBitsSet(Bits, Bits - (b + 1) * 8)))
5290           return false;
5291 
5292         LHS = Op0.getOperand(0);
5293         RHS = Op0.getOperand(1);
5294         return true;
5295       }
5296 
5297       return false;
5298     }
5299 
5300     if (CC != ISD::SETEQ)
5301       return false;
5302 
5303     SDValue Op = O.getOperand(0);
5304     if (Op.getOpcode() == ISD::AND) {
5305       if (!isa<ConstantSDNode>(Op.getOperand(1)))
5306         return false;
5307       if (Op.getConstantOperandVal(1) != (UINT64_C(0xFF) << (8*b)))
5308         return false;
5309 
5310       SDValue XOR = Op.getOperand(0);
5311       if (XOR.getOpcode() == ISD::TRUNCATE)
5312         XOR = XOR.getOperand(0);
5313       if (XOR.getOpcode() != ISD::XOR)
5314         return false;
5315 
5316       LHS = XOR.getOperand(0);
5317       RHS = XOR.getOperand(1);
5318       return true;
5319     } else if (Op.getOpcode() == ISD::SRL) {
5320       if (!isa<ConstantSDNode>(Op.getOperand(1)))
5321         return false;
5322       unsigned Bits = Op.getValueSizeInBits();
5323       if (b != Bits/8-1)
5324         return false;
5325       if (Op.getConstantOperandVal(1) != Bits-8)
5326         return false;
5327 
5328       SDValue XOR = Op.getOperand(0);
5329       if (XOR.getOpcode() == ISD::TRUNCATE)
5330         XOR = XOR.getOperand(0);
5331       if (XOR.getOpcode() != ISD::XOR)
5332         return false;
5333 
5334       LHS = XOR.getOperand(0);
5335       RHS = XOR.getOperand(1);
5336       return true;
5337     }
5338 
5339     return false;
5340   };
5341 
5342   SmallVector<SDValue, 8> Queue(1, SDValue(N, 0));
5343   while (!Queue.empty()) {
5344     SDValue V = Queue.pop_back_val();
5345 
5346     for (const SDValue &O : V.getNode()->ops()) {
5347       unsigned b = 0;
5348       uint64_t M = 0, A = 0;
5349       SDValue OLHS, ORHS;
5350       if (O.getOpcode() == ISD::OR) {
5351         Queue.push_back(O);
5352       } else if (IsByteSelectCC(O, b, M, A, OLHS, ORHS)) {
5353         if (!LHS) {
5354           LHS = OLHS;
5355           RHS = ORHS;
5356           BytesFound[b] = true;
5357           Mask |= M;
5358           Alt  |= A;
5359         } else if ((LHS == ORHS && RHS == OLHS) ||
5360                    (RHS == ORHS && LHS == OLHS)) {
5361           BytesFound[b] = true;
5362           Mask |= M;
5363           Alt  |= A;
5364         } else {
5365           return Res;
5366         }
5367       } else {
5368         return Res;
5369       }
5370     }
5371   }
5372 
5373   unsigned LastB = 0, BCnt = 0;
5374   for (unsigned i = 0; i < 8; ++i)
5375     if (BytesFound[LastB]) {
5376       ++BCnt;
5377       LastB = i;
5378     }
5379 
5380   if (!LastB || BCnt < 2)
5381     return Res;
5382 
5383   // Because we'll be zero-extending the output anyway if don't have a specific
5384   // value for each input byte (via the Mask), we can 'anyext' the inputs.
5385   if (LHS.getValueType() != VT) {
5386     LHS = CurDAG->getAnyExtOrTrunc(LHS, dl, VT);
5387     RHS = CurDAG->getAnyExtOrTrunc(RHS, dl, VT);
5388   }
5389 
5390   Res = CurDAG->getNode(PPCISD::CMPB, dl, VT, LHS, RHS);
5391 
5392   bool NonTrivialMask = ((int64_t) Mask) != INT64_C(-1);
5393   if (NonTrivialMask && !Alt) {
5394     // Res = Mask & CMPB
5395     Res = CurDAG->getNode(ISD::AND, dl, VT, Res,
5396                           CurDAG->getConstant(Mask, dl, VT));
5397   } else if (Alt) {
5398     // Res = (CMPB & Mask) | (~CMPB & Alt)
5399     // Which, as suggested here:
5400     //   https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge
5401     // can be written as:
5402     // Res = Alt ^ ((Alt ^ Mask) & CMPB)
5403     // useful because the (Alt ^ Mask) can be pre-computed.
5404     Res = CurDAG->getNode(ISD::AND, dl, VT, Res,
5405                           CurDAG->getConstant(Mask ^ Alt, dl, VT));
5406     Res = CurDAG->getNode(ISD::XOR, dl, VT, Res,
5407                           CurDAG->getConstant(Alt, dl, VT));
5408   }
5409 
5410   return Res;
5411 }
5412 
5413 // When CR bit registers are enabled, an extension of an i1 variable to a i32
5414 // or i64 value is lowered in terms of a SELECT_I[48] operation, and thus
5415 // involves constant materialization of a 0 or a 1 or both. If the result of
5416 // the extension is then operated upon by some operator that can be constant
5417 // folded with a constant 0 or 1, and that constant can be materialized using
5418 // only one instruction (like a zero or one), then we should fold in those
5419 // operations with the select.
5420 void PPCDAGToDAGISel::foldBoolExts(SDValue &Res, SDNode *&N) {
5421   if (!PPCSubTarget->useCRBits())
5422     return;
5423 
5424   if (N->getOpcode() != ISD::ZERO_EXTEND &&
5425       N->getOpcode() != ISD::SIGN_EXTEND &&
5426       N->getOpcode() != ISD::ANY_EXTEND)
5427     return;
5428 
5429   if (N->getOperand(0).getValueType() != MVT::i1)
5430     return;
5431 
5432   if (!N->hasOneUse())
5433     return;
5434 
5435   SDLoc dl(N);
5436   EVT VT = N->getValueType(0);
5437   SDValue Cond = N->getOperand(0);
5438   SDValue ConstTrue =
5439     CurDAG->getConstant(N->getOpcode() == ISD::SIGN_EXTEND ? -1 : 1, dl, VT);
5440   SDValue ConstFalse = CurDAG->getConstant(0, dl, VT);
5441 
5442   do {
5443     SDNode *User = *N->use_begin();
5444     if (User->getNumOperands() != 2)
5445       break;
5446 
5447     auto TryFold = [this, N, User, dl](SDValue Val) {
5448       SDValue UserO0 = User->getOperand(0), UserO1 = User->getOperand(1);
5449       SDValue O0 = UserO0.getNode() == N ? Val : UserO0;
5450       SDValue O1 = UserO1.getNode() == N ? Val : UserO1;
5451 
5452       return CurDAG->FoldConstantArithmetic(User->getOpcode(), dl,
5453                                             User->getValueType(0),
5454                                             O0.getNode(), O1.getNode());
5455     };
5456 
5457     // FIXME: When the semantics of the interaction between select and undef
5458     // are clearly defined, it may turn out to be unnecessary to break here.
5459     SDValue TrueRes = TryFold(ConstTrue);
5460     if (!TrueRes || TrueRes.isUndef())
5461       break;
5462     SDValue FalseRes = TryFold(ConstFalse);
5463     if (!FalseRes || FalseRes.isUndef())
5464       break;
5465 
5466     // For us to materialize these using one instruction, we must be able to
5467     // represent them as signed 16-bit integers.
5468     uint64_t True  = cast<ConstantSDNode>(TrueRes)->getZExtValue(),
5469              False = cast<ConstantSDNode>(FalseRes)->getZExtValue();
5470     if (!isInt<16>(True) || !isInt<16>(False))
5471       break;
5472 
5473     // We can replace User with a new SELECT node, and try again to see if we
5474     // can fold the select with its user.
5475     Res = CurDAG->getSelect(dl, User->getValueType(0), Cond, TrueRes, FalseRes);
5476     N = User;
5477     ConstTrue = TrueRes;
5478     ConstFalse = FalseRes;
5479   } while (N->hasOneUse());
5480 }
5481 
5482 void PPCDAGToDAGISel::PreprocessISelDAG() {
5483   SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
5484 
5485   bool MadeChange = false;
5486   while (Position != CurDAG->allnodes_begin()) {
5487     SDNode *N = &*--Position;
5488     if (N->use_empty())
5489       continue;
5490 
5491     SDValue Res;
5492     switch (N->getOpcode()) {
5493     default: break;
5494     case ISD::OR:
5495       Res = combineToCMPB(N);
5496       break;
5497     }
5498 
5499     if (!Res)
5500       foldBoolExts(Res, N);
5501 
5502     if (Res) {
5503       LLVM_DEBUG(dbgs() << "PPC DAG preprocessing replacing:\nOld:    ");
5504       LLVM_DEBUG(N->dump(CurDAG));
5505       LLVM_DEBUG(dbgs() << "\nNew: ");
5506       LLVM_DEBUG(Res.getNode()->dump(CurDAG));
5507       LLVM_DEBUG(dbgs() << "\n");
5508 
5509       CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
5510       MadeChange = true;
5511     }
5512   }
5513 
5514   if (MadeChange)
5515     CurDAG->RemoveDeadNodes();
5516 }
5517 
5518 /// PostprocessISelDAG - Perform some late peephole optimizations
5519 /// on the DAG representation.
5520 void PPCDAGToDAGISel::PostprocessISelDAG() {
5521   // Skip peepholes at -O0.
5522   if (TM.getOptLevel() == CodeGenOpt::None)
5523     return;
5524 
5525   PeepholePPC64();
5526   PeepholeCROps();
5527   PeepholePPC64ZExt();
5528 }
5529 
5530 // Check if all users of this node will become isel where the second operand
5531 // is the constant zero. If this is so, and if we can negate the condition,
5532 // then we can flip the true and false operands. This will allow the zero to
5533 // be folded with the isel so that we don't need to materialize a register
5534 // containing zero.
5535 bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode *N) {
5536   for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
5537        UI != UE; ++UI) {
5538     SDNode *User = *UI;
5539     if (!User->isMachineOpcode())
5540       return false;
5541     if (User->getMachineOpcode() != PPC::SELECT_I4 &&
5542         User->getMachineOpcode() != PPC::SELECT_I8)
5543       return false;
5544 
5545     SDNode *Op2 = User->getOperand(2).getNode();
5546     if (!Op2->isMachineOpcode())
5547       return false;
5548 
5549     if (Op2->getMachineOpcode() != PPC::LI &&
5550         Op2->getMachineOpcode() != PPC::LI8)
5551       return false;
5552 
5553     ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op2->getOperand(0));
5554     if (!C)
5555       return false;
5556 
5557     if (!C->isNullValue())
5558       return false;
5559   }
5560 
5561   return true;
5562 }
5563 
5564 void PPCDAGToDAGISel::SwapAllSelectUsers(SDNode *N) {
5565   SmallVector<SDNode *, 4> ToReplace;
5566   for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
5567        UI != UE; ++UI) {
5568     SDNode *User = *UI;
5569     assert((User->getMachineOpcode() == PPC::SELECT_I4 ||
5570             User->getMachineOpcode() == PPC::SELECT_I8) &&
5571            "Must have all select users");
5572     ToReplace.push_back(User);
5573   }
5574 
5575   for (SmallVector<SDNode *, 4>::iterator UI = ToReplace.begin(),
5576        UE = ToReplace.end(); UI != UE; ++UI) {
5577     SDNode *User = *UI;
5578     SDNode *ResNode =
5579       CurDAG->getMachineNode(User->getMachineOpcode(), SDLoc(User),
5580                              User->getValueType(0), User->getOperand(0),
5581                              User->getOperand(2),
5582                              User->getOperand(1));
5583 
5584     LLVM_DEBUG(dbgs() << "CR Peephole replacing:\nOld:    ");
5585     LLVM_DEBUG(User->dump(CurDAG));
5586     LLVM_DEBUG(dbgs() << "\nNew: ");
5587     LLVM_DEBUG(ResNode->dump(CurDAG));
5588     LLVM_DEBUG(dbgs() << "\n");
5589 
5590     ReplaceUses(User, ResNode);
5591   }
5592 }
5593 
5594 void PPCDAGToDAGISel::PeepholeCROps() {
5595   bool IsModified;
5596   do {
5597     IsModified = false;
5598     for (SDNode &Node : CurDAG->allnodes()) {
5599       MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node);
5600       if (!MachineNode || MachineNode->use_empty())
5601         continue;
5602       SDNode *ResNode = MachineNode;
5603 
5604       bool Op1Set   = false, Op1Unset = false,
5605            Op1Not   = false,
5606            Op2Set   = false, Op2Unset = false,
5607            Op2Not   = false;
5608 
5609       unsigned Opcode = MachineNode->getMachineOpcode();
5610       switch (Opcode) {
5611       default: break;
5612       case PPC::CRAND:
5613       case PPC::CRNAND:
5614       case PPC::CROR:
5615       case PPC::CRXOR:
5616       case PPC::CRNOR:
5617       case PPC::CREQV:
5618       case PPC::CRANDC:
5619       case PPC::CRORC: {
5620         SDValue Op = MachineNode->getOperand(1);
5621         if (Op.isMachineOpcode()) {
5622           if (Op.getMachineOpcode() == PPC::CRSET)
5623             Op2Set = true;
5624           else if (Op.getMachineOpcode() == PPC::CRUNSET)
5625             Op2Unset = true;
5626           else if (Op.getMachineOpcode() == PPC::CRNOR &&
5627                    Op.getOperand(0) == Op.getOperand(1))
5628             Op2Not = true;
5629         }
5630         LLVM_FALLTHROUGH;
5631       }
5632       case PPC::BC:
5633       case PPC::BCn:
5634       case PPC::SELECT_I4:
5635       case PPC::SELECT_I8:
5636       case PPC::SELECT_F4:
5637       case PPC::SELECT_F8:
5638       case PPC::SELECT_QFRC:
5639       case PPC::SELECT_QSRC:
5640       case PPC::SELECT_QBRC:
5641       case PPC::SELECT_SPE:
5642       case PPC::SELECT_SPE4:
5643       case PPC::SELECT_VRRC:
5644       case PPC::SELECT_VSFRC:
5645       case PPC::SELECT_VSSRC:
5646       case PPC::SELECT_VSRC: {
5647         SDValue Op = MachineNode->getOperand(0);
5648         if (Op.isMachineOpcode()) {
5649           if (Op.getMachineOpcode() == PPC::CRSET)
5650             Op1Set = true;
5651           else if (Op.getMachineOpcode() == PPC::CRUNSET)
5652             Op1Unset = true;
5653           else if (Op.getMachineOpcode() == PPC::CRNOR &&
5654                    Op.getOperand(0) == Op.getOperand(1))
5655             Op1Not = true;
5656         }
5657         }
5658         break;
5659       }
5660 
5661       bool SelectSwap = false;
5662       switch (Opcode) {
5663       default: break;
5664       case PPC::CRAND:
5665         if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
5666           // x & x = x
5667           ResNode = MachineNode->getOperand(0).getNode();
5668         else if (Op1Set)
5669           // 1 & y = y
5670           ResNode = MachineNode->getOperand(1).getNode();
5671         else if (Op2Set)
5672           // x & 1 = x
5673           ResNode = MachineNode->getOperand(0).getNode();
5674         else if (Op1Unset || Op2Unset)
5675           // x & 0 = 0 & y = 0
5676           ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
5677                                            MVT::i1);
5678         else if (Op1Not)
5679           // ~x & y = andc(y, x)
5680           ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
5681                                            MVT::i1, MachineNode->getOperand(1),
5682                                            MachineNode->getOperand(0).
5683                                              getOperand(0));
5684         else if (Op2Not)
5685           // x & ~y = andc(x, y)
5686           ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
5687                                            MVT::i1, MachineNode->getOperand(0),
5688                                            MachineNode->getOperand(1).
5689                                              getOperand(0));
5690         else if (AllUsersSelectZero(MachineNode)) {
5691           ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode),
5692                                            MVT::i1, MachineNode->getOperand(0),
5693                                            MachineNode->getOperand(1));
5694           SelectSwap = true;
5695         }
5696         break;
5697       case PPC::CRNAND:
5698         if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
5699           // nand(x, x) -> nor(x, x)
5700           ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
5701                                            MVT::i1, MachineNode->getOperand(0),
5702                                            MachineNode->getOperand(0));
5703         else if (Op1Set)
5704           // nand(1, y) -> nor(y, y)
5705           ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
5706                                            MVT::i1, MachineNode->getOperand(1),
5707                                            MachineNode->getOperand(1));
5708         else if (Op2Set)
5709           // nand(x, 1) -> nor(x, x)
5710           ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
5711                                            MVT::i1, MachineNode->getOperand(0),
5712                                            MachineNode->getOperand(0));
5713         else if (Op1Unset || Op2Unset)
5714           // nand(x, 0) = nand(0, y) = 1
5715           ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
5716                                            MVT::i1);
5717         else if (Op1Not)
5718           // nand(~x, y) = ~(~x & y) = x | ~y = orc(x, y)
5719           ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
5720                                            MVT::i1, MachineNode->getOperand(0).
5721                                                       getOperand(0),
5722                                            MachineNode->getOperand(1));
5723         else if (Op2Not)
5724           // nand(x, ~y) = ~x | y = orc(y, x)
5725           ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
5726                                            MVT::i1, MachineNode->getOperand(1).
5727                                                       getOperand(0),
5728                                            MachineNode->getOperand(0));
5729         else if (AllUsersSelectZero(MachineNode)) {
5730           ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode),
5731                                            MVT::i1, MachineNode->getOperand(0),
5732                                            MachineNode->getOperand(1));
5733           SelectSwap = true;
5734         }
5735         break;
5736       case PPC::CROR:
5737         if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
5738           // x | x = x
5739           ResNode = MachineNode->getOperand(0).getNode();
5740         else if (Op1Set || Op2Set)
5741           // x | 1 = 1 | y = 1
5742           ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
5743                                            MVT::i1);
5744         else if (Op1Unset)
5745           // 0 | y = y
5746           ResNode = MachineNode->getOperand(1).getNode();
5747         else if (Op2Unset)
5748           // x | 0 = x
5749           ResNode = MachineNode->getOperand(0).getNode();
5750         else if (Op1Not)
5751           // ~x | y = orc(y, x)
5752           ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
5753                                            MVT::i1, MachineNode->getOperand(1),
5754                                            MachineNode->getOperand(0).
5755                                              getOperand(0));
5756         else if (Op2Not)
5757           // x | ~y = orc(x, y)
5758           ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
5759                                            MVT::i1, MachineNode->getOperand(0),
5760                                            MachineNode->getOperand(1).
5761                                              getOperand(0));
5762         else if (AllUsersSelectZero(MachineNode)) {
5763           ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
5764                                            MVT::i1, MachineNode->getOperand(0),
5765                                            MachineNode->getOperand(1));
5766           SelectSwap = true;
5767         }
5768         break;
5769       case PPC::CRXOR:
5770         if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
5771           // xor(x, x) = 0
5772           ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
5773                                            MVT::i1);
5774         else if (Op1Set)
5775           // xor(1, y) -> nor(y, y)
5776           ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
5777                                            MVT::i1, MachineNode->getOperand(1),
5778                                            MachineNode->getOperand(1));
5779         else if (Op2Set)
5780           // xor(x, 1) -> nor(x, x)
5781           ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
5782                                            MVT::i1, MachineNode->getOperand(0),
5783                                            MachineNode->getOperand(0));
5784         else if (Op1Unset)
5785           // xor(0, y) = y
5786           ResNode = MachineNode->getOperand(1).getNode();
5787         else if (Op2Unset)
5788           // xor(x, 0) = x
5789           ResNode = MachineNode->getOperand(0).getNode();
5790         else if (Op1Not)
5791           // xor(~x, y) = eqv(x, y)
5792           ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
5793                                            MVT::i1, MachineNode->getOperand(0).
5794                                                       getOperand(0),
5795                                            MachineNode->getOperand(1));
5796         else if (Op2Not)
5797           // xor(x, ~y) = eqv(x, y)
5798           ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
5799                                            MVT::i1, MachineNode->getOperand(0),
5800                                            MachineNode->getOperand(1).
5801                                              getOperand(0));
5802         else if (AllUsersSelectZero(MachineNode)) {
5803           ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
5804                                            MVT::i1, MachineNode->getOperand(0),
5805                                            MachineNode->getOperand(1));
5806           SelectSwap = true;
5807         }
5808         break;
5809       case PPC::CRNOR:
5810         if (Op1Set || Op2Set)
5811           // nor(1, y) -> 0
5812           ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
5813                                            MVT::i1);
5814         else if (Op1Unset)
5815           // nor(0, y) = ~y -> nor(y, y)
5816           ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
5817                                            MVT::i1, MachineNode->getOperand(1),
5818                                            MachineNode->getOperand(1));
5819         else if (Op2Unset)
5820           // nor(x, 0) = ~x
5821           ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
5822                                            MVT::i1, MachineNode->getOperand(0),
5823                                            MachineNode->getOperand(0));
5824         else if (Op1Not)
5825           // nor(~x, y) = andc(x, y)
5826           ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
5827                                            MVT::i1, MachineNode->getOperand(0).
5828                                                       getOperand(0),
5829                                            MachineNode->getOperand(1));
5830         else if (Op2Not)
5831           // nor(x, ~y) = andc(y, x)
5832           ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
5833                                            MVT::i1, MachineNode->getOperand(1).
5834                                                       getOperand(0),
5835                                            MachineNode->getOperand(0));
5836         else if (AllUsersSelectZero(MachineNode)) {
5837           ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode),
5838                                            MVT::i1, MachineNode->getOperand(0),
5839                                            MachineNode->getOperand(1));
5840           SelectSwap = true;
5841         }
5842         break;
5843       case PPC::CREQV:
5844         if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
5845           // eqv(x, x) = 1
5846           ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
5847                                            MVT::i1);
5848         else if (Op1Set)
5849           // eqv(1, y) = y
5850           ResNode = MachineNode->getOperand(1).getNode();
5851         else if (Op2Set)
5852           // eqv(x, 1) = x
5853           ResNode = MachineNode->getOperand(0).getNode();
5854         else if (Op1Unset)
5855           // eqv(0, y) = ~y -> nor(y, y)
5856           ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
5857                                            MVT::i1, MachineNode->getOperand(1),
5858                                            MachineNode->getOperand(1));
5859         else if (Op2Unset)
5860           // eqv(x, 0) = ~x
5861           ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
5862                                            MVT::i1, MachineNode->getOperand(0),
5863                                            MachineNode->getOperand(0));
5864         else if (Op1Not)
5865           // eqv(~x, y) = xor(x, y)
5866           ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
5867                                            MVT::i1, MachineNode->getOperand(0).
5868                                                       getOperand(0),
5869                                            MachineNode->getOperand(1));
5870         else if (Op2Not)
5871           // eqv(x, ~y) = xor(x, y)
5872           ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
5873                                            MVT::i1, MachineNode->getOperand(0),
5874                                            MachineNode->getOperand(1).
5875                                              getOperand(0));
5876         else if (AllUsersSelectZero(MachineNode)) {
5877           ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
5878                                            MVT::i1, MachineNode->getOperand(0),
5879                                            MachineNode->getOperand(1));
5880           SelectSwap = true;
5881         }
5882         break;
5883       case PPC::CRANDC:
5884         if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
5885           // andc(x, x) = 0
5886           ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
5887                                            MVT::i1);
5888         else if (Op1Set)
5889           // andc(1, y) = ~y
5890           ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
5891                                            MVT::i1, MachineNode->getOperand(1),
5892                                            MachineNode->getOperand(1));
5893         else if (Op1Unset || Op2Set)
5894           // andc(0, y) = andc(x, 1) = 0
5895           ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
5896                                            MVT::i1);
5897         else if (Op2Unset)
5898           // andc(x, 0) = x
5899           ResNode = MachineNode->getOperand(0).getNode();
5900         else if (Op1Not)
5901           // andc(~x, y) = ~(x | y) = nor(x, y)
5902           ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
5903                                            MVT::i1, MachineNode->getOperand(0).
5904                                                       getOperand(0),
5905                                            MachineNode->getOperand(1));
5906         else if (Op2Not)
5907           // andc(x, ~y) = x & y
5908           ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode),
5909                                            MVT::i1, MachineNode->getOperand(0),
5910                                            MachineNode->getOperand(1).
5911                                              getOperand(0));
5912         else if (AllUsersSelectZero(MachineNode)) {
5913           ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
5914                                            MVT::i1, MachineNode->getOperand(1),
5915                                            MachineNode->getOperand(0));
5916           SelectSwap = true;
5917         }
5918         break;
5919       case PPC::CRORC:
5920         if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
5921           // orc(x, x) = 1
5922           ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
5923                                            MVT::i1);
5924         else if (Op1Set || Op2Unset)
5925           // orc(1, y) = orc(x, 0) = 1
5926           ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
5927                                            MVT::i1);
5928         else if (Op2Set)
5929           // orc(x, 1) = x
5930           ResNode = MachineNode->getOperand(0).getNode();
5931         else if (Op1Unset)
5932           // orc(0, y) = ~y
5933           ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
5934                                            MVT::i1, MachineNode->getOperand(1),
5935                                            MachineNode->getOperand(1));
5936         else if (Op1Not)
5937           // orc(~x, y) = ~(x & y) = nand(x, y)
5938           ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode),
5939                                            MVT::i1, MachineNode->getOperand(0).
5940                                                       getOperand(0),
5941                                            MachineNode->getOperand(1));
5942         else if (Op2Not)
5943           // orc(x, ~y) = x | y
5944           ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode),
5945                                            MVT::i1, MachineNode->getOperand(0),
5946                                            MachineNode->getOperand(1).
5947                                              getOperand(0));
5948         else if (AllUsersSelectZero(MachineNode)) {
5949           ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
5950                                            MVT::i1, MachineNode->getOperand(1),
5951                                            MachineNode->getOperand(0));
5952           SelectSwap = true;
5953         }
5954         break;
5955       case PPC::SELECT_I4:
5956       case PPC::SELECT_I8:
5957       case PPC::SELECT_F4:
5958       case PPC::SELECT_F8:
5959       case PPC::SELECT_QFRC:
5960       case PPC::SELECT_QSRC:
5961       case PPC::SELECT_QBRC:
5962       case PPC::SELECT_SPE:
5963       case PPC::SELECT_SPE4:
5964       case PPC::SELECT_VRRC:
5965       case PPC::SELECT_VSFRC:
5966       case PPC::SELECT_VSSRC:
5967       case PPC::SELECT_VSRC:
5968         if (Op1Set)
5969           ResNode = MachineNode->getOperand(1).getNode();
5970         else if (Op1Unset)
5971           ResNode = MachineNode->getOperand(2).getNode();
5972         else if (Op1Not)
5973           ResNode = CurDAG->getMachineNode(MachineNode->getMachineOpcode(),
5974                                            SDLoc(MachineNode),
5975                                            MachineNode->getValueType(0),
5976                                            MachineNode->getOperand(0).
5977                                              getOperand(0),
5978                                            MachineNode->getOperand(2),
5979                                            MachineNode->getOperand(1));
5980         break;
5981       case PPC::BC:
5982       case PPC::BCn:
5983         if (Op1Not)
5984           ResNode = CurDAG->getMachineNode(Opcode == PPC::BC ? PPC::BCn :
5985                                                                PPC::BC,
5986                                            SDLoc(MachineNode),
5987                                            MVT::Other,
5988                                            MachineNode->getOperand(0).
5989                                              getOperand(0),
5990                                            MachineNode->getOperand(1),
5991                                            MachineNode->getOperand(2));
5992         // FIXME: Handle Op1Set, Op1Unset here too.
5993         break;
5994       }
5995 
5996       // If we're inverting this node because it is used only by selects that
5997       // we'd like to swap, then swap the selects before the node replacement.
5998       if (SelectSwap)
5999         SwapAllSelectUsers(MachineNode);
6000 
6001       if (ResNode != MachineNode) {
6002         LLVM_DEBUG(dbgs() << "CR Peephole replacing:\nOld:    ");
6003         LLVM_DEBUG(MachineNode->dump(CurDAG));
6004         LLVM_DEBUG(dbgs() << "\nNew: ");
6005         LLVM_DEBUG(ResNode->dump(CurDAG));
6006         LLVM_DEBUG(dbgs() << "\n");
6007 
6008         ReplaceUses(MachineNode, ResNode);
6009         IsModified = true;
6010       }
6011     }
6012     if (IsModified)
6013       CurDAG->RemoveDeadNodes();
6014   } while (IsModified);
6015 }
6016 
6017 // Gather the set of 32-bit operations that are known to have their
6018 // higher-order 32 bits zero, where ToPromote contains all such operations.
6019 static bool PeepholePPC64ZExtGather(SDValue Op32,
6020                                     SmallPtrSetImpl<SDNode *> &ToPromote) {
6021   if (!Op32.isMachineOpcode())
6022     return false;
6023 
6024   // First, check for the "frontier" instructions (those that will clear the
6025   // higher-order 32 bits.
6026 
6027   // For RLWINM and RLWNM, we need to make sure that the mask does not wrap
6028   // around. If it does not, then these instructions will clear the
6029   // higher-order bits.
6030   if ((Op32.getMachineOpcode() == PPC::RLWINM ||
6031        Op32.getMachineOpcode() == PPC::RLWNM) &&
6032       Op32.getConstantOperandVal(2) <= Op32.getConstantOperandVal(3)) {
6033     ToPromote.insert(Op32.getNode());
6034     return true;
6035   }
6036 
6037   // SLW and SRW always clear the higher-order bits.
6038   if (Op32.getMachineOpcode() == PPC::SLW ||
6039       Op32.getMachineOpcode() == PPC::SRW) {
6040     ToPromote.insert(Op32.getNode());
6041     return true;
6042   }
6043 
6044   // For LI and LIS, we need the immediate to be positive (so that it is not
6045   // sign extended).
6046   if (Op32.getMachineOpcode() == PPC::LI ||
6047       Op32.getMachineOpcode() == PPC::LIS) {
6048     if (!isUInt<15>(Op32.getConstantOperandVal(0)))
6049       return false;
6050 
6051     ToPromote.insert(Op32.getNode());
6052     return true;
6053   }
6054 
6055   // LHBRX and LWBRX always clear the higher-order bits.
6056   if (Op32.getMachineOpcode() == PPC::LHBRX ||
6057       Op32.getMachineOpcode() == PPC::LWBRX) {
6058     ToPromote.insert(Op32.getNode());
6059     return true;
6060   }
6061 
6062   // CNT[LT]ZW always produce a 64-bit value in [0,32], and so is zero extended.
6063   if (Op32.getMachineOpcode() == PPC::CNTLZW ||
6064       Op32.getMachineOpcode() == PPC::CNTTZW) {
6065     ToPromote.insert(Op32.getNode());
6066     return true;
6067   }
6068 
6069   // Next, check for those instructions we can look through.
6070 
6071   // Assuming the mask does not wrap around, then the higher-order bits are
6072   // taken directly from the first operand.
6073   if (Op32.getMachineOpcode() == PPC::RLWIMI &&
6074       Op32.getConstantOperandVal(3) <= Op32.getConstantOperandVal(4)) {
6075     SmallPtrSet<SDNode *, 16> ToPromote1;
6076     if (!PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1))
6077       return false;
6078 
6079     ToPromote.insert(Op32.getNode());
6080     ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
6081     return true;
6082   }
6083 
6084   // For OR, the higher-order bits are zero if that is true for both operands.
6085   // For SELECT_I4, the same is true (but the relevant operand numbers are
6086   // shifted by 1).
6087   if (Op32.getMachineOpcode() == PPC::OR ||
6088       Op32.getMachineOpcode() == PPC::SELECT_I4) {
6089     unsigned B = Op32.getMachineOpcode() == PPC::SELECT_I4 ? 1 : 0;
6090     SmallPtrSet<SDNode *, 16> ToPromote1;
6091     if (!PeepholePPC64ZExtGather(Op32.getOperand(B+0), ToPromote1))
6092       return false;
6093     if (!PeepholePPC64ZExtGather(Op32.getOperand(B+1), ToPromote1))
6094       return false;
6095 
6096     ToPromote.insert(Op32.getNode());
6097     ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
6098     return true;
6099   }
6100 
6101   // For ORI and ORIS, we need the higher-order bits of the first operand to be
6102   // zero, and also for the constant to be positive (so that it is not sign
6103   // extended).
6104   if (Op32.getMachineOpcode() == PPC::ORI ||
6105       Op32.getMachineOpcode() == PPC::ORIS) {
6106     SmallPtrSet<SDNode *, 16> ToPromote1;
6107     if (!PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1))
6108       return false;
6109     if (!isUInt<15>(Op32.getConstantOperandVal(1)))
6110       return false;
6111 
6112     ToPromote.insert(Op32.getNode());
6113     ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
6114     return true;
6115   }
6116 
6117   // The higher-order bits of AND are zero if that is true for at least one of
6118   // the operands.
6119   if (Op32.getMachineOpcode() == PPC::AND) {
6120     SmallPtrSet<SDNode *, 16> ToPromote1, ToPromote2;
6121     bool Op0OK =
6122       PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1);
6123     bool Op1OK =
6124       PeepholePPC64ZExtGather(Op32.getOperand(1), ToPromote2);
6125     if (!Op0OK && !Op1OK)
6126       return false;
6127 
6128     ToPromote.insert(Op32.getNode());
6129 
6130     if (Op0OK)
6131       ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
6132 
6133     if (Op1OK)
6134       ToPromote.insert(ToPromote2.begin(), ToPromote2.end());
6135 
6136     return true;
6137   }
6138 
6139   // For ANDI and ANDIS, the higher-order bits are zero if either that is true
6140   // of the first operand, or if the second operand is positive (so that it is
6141   // not sign extended).
6142   if (Op32.getMachineOpcode() == PPC::ANDIo ||
6143       Op32.getMachineOpcode() == PPC::ANDISo) {
6144     SmallPtrSet<SDNode *, 16> ToPromote1;
6145     bool Op0OK =
6146       PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1);
6147     bool Op1OK = isUInt<15>(Op32.getConstantOperandVal(1));
6148     if (!Op0OK && !Op1OK)
6149       return false;
6150 
6151     ToPromote.insert(Op32.getNode());
6152 
6153     if (Op0OK)
6154       ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
6155 
6156     return true;
6157   }
6158 
6159   return false;
6160 }
6161 
6162 void PPCDAGToDAGISel::PeepholePPC64ZExt() {
6163   if (!PPCSubTarget->isPPC64())
6164     return;
6165 
6166   // When we zero-extend from i32 to i64, we use a pattern like this:
6167   // def : Pat<(i64 (zext i32:$in)),
6168   //           (RLDICL (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $in, sub_32),
6169   //                   0, 32)>;
6170   // There are several 32-bit shift/rotate instructions, however, that will
6171   // clear the higher-order bits of their output, rendering the RLDICL
6172   // unnecessary. When that happens, we remove it here, and redefine the
6173   // relevant 32-bit operation to be a 64-bit operation.
6174 
6175   SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
6176 
6177   bool MadeChange = false;
6178   while (Position != CurDAG->allnodes_begin()) {
6179     SDNode *N = &*--Position;
6180     // Skip dead nodes and any non-machine opcodes.
6181     if (N->use_empty() || !N->isMachineOpcode())
6182       continue;
6183 
6184     if (N->getMachineOpcode() != PPC::RLDICL)
6185       continue;
6186 
6187     if (N->getConstantOperandVal(1) != 0 ||
6188         N->getConstantOperandVal(2) != 32)
6189       continue;
6190 
6191     SDValue ISR = N->getOperand(0);
6192     if (!ISR.isMachineOpcode() ||
6193         ISR.getMachineOpcode() != TargetOpcode::INSERT_SUBREG)
6194       continue;
6195 
6196     if (!ISR.hasOneUse())
6197       continue;
6198 
6199     if (ISR.getConstantOperandVal(2) != PPC::sub_32)
6200       continue;
6201 
6202     SDValue IDef = ISR.getOperand(0);
6203     if (!IDef.isMachineOpcode() ||
6204         IDef.getMachineOpcode() != TargetOpcode::IMPLICIT_DEF)
6205       continue;
6206 
6207     // We now know that we're looking at a canonical i32 -> i64 zext. See if we
6208     // can get rid of it.
6209 
6210     SDValue Op32 = ISR->getOperand(1);
6211     if (!Op32.isMachineOpcode())
6212       continue;
6213 
6214     // There are some 32-bit instructions that always clear the high-order 32
6215     // bits, there are also some instructions (like AND) that we can look
6216     // through.
6217     SmallPtrSet<SDNode *, 16> ToPromote;
6218     if (!PeepholePPC64ZExtGather(Op32, ToPromote))
6219       continue;
6220 
6221     // If the ToPromote set contains nodes that have uses outside of the set
6222     // (except for the original INSERT_SUBREG), then abort the transformation.
6223     bool OutsideUse = false;
6224     for (SDNode *PN : ToPromote) {
6225       for (SDNode *UN : PN->uses()) {
6226         if (!ToPromote.count(UN) && UN != ISR.getNode()) {
6227           OutsideUse = true;
6228           break;
6229         }
6230       }
6231 
6232       if (OutsideUse)
6233         break;
6234     }
6235     if (OutsideUse)
6236       continue;
6237 
6238     MadeChange = true;
6239 
6240     // We now know that this zero extension can be removed by promoting to
6241     // nodes in ToPromote to 64-bit operations, where for operations in the
6242     // frontier of the set, we need to insert INSERT_SUBREGs for their
6243     // operands.
6244     for (SDNode *PN : ToPromote) {
6245       unsigned NewOpcode;
6246       switch (PN->getMachineOpcode()) {
6247       default:
6248         llvm_unreachable("Don't know the 64-bit variant of this instruction");
6249       case PPC::RLWINM:    NewOpcode = PPC::RLWINM8; break;
6250       case PPC::RLWNM:     NewOpcode = PPC::RLWNM8; break;
6251       case PPC::SLW:       NewOpcode = PPC::SLW8; break;
6252       case PPC::SRW:       NewOpcode = PPC::SRW8; break;
6253       case PPC::LI:        NewOpcode = PPC::LI8; break;
6254       case PPC::LIS:       NewOpcode = PPC::LIS8; break;
6255       case PPC::LHBRX:     NewOpcode = PPC::LHBRX8; break;
6256       case PPC::LWBRX:     NewOpcode = PPC::LWBRX8; break;
6257       case PPC::CNTLZW:    NewOpcode = PPC::CNTLZW8; break;
6258       case PPC::CNTTZW:    NewOpcode = PPC::CNTTZW8; break;
6259       case PPC::RLWIMI:    NewOpcode = PPC::RLWIMI8; break;
6260       case PPC::OR:        NewOpcode = PPC::OR8; break;
6261       case PPC::SELECT_I4: NewOpcode = PPC::SELECT_I8; break;
6262       case PPC::ORI:       NewOpcode = PPC::ORI8; break;
6263       case PPC::ORIS:      NewOpcode = PPC::ORIS8; break;
6264       case PPC::AND:       NewOpcode = PPC::AND8; break;
6265       case PPC::ANDIo:     NewOpcode = PPC::ANDIo8; break;
6266       case PPC::ANDISo:    NewOpcode = PPC::ANDISo8; break;
6267       }
6268 
6269       // Note: During the replacement process, the nodes will be in an
6270       // inconsistent state (some instructions will have operands with values
6271       // of the wrong type). Once done, however, everything should be right
6272       // again.
6273 
6274       SmallVector<SDValue, 4> Ops;
6275       for (const SDValue &V : PN->ops()) {
6276         if (!ToPromote.count(V.getNode()) && V.getValueType() == MVT::i32 &&
6277             !isa<ConstantSDNode>(V)) {
6278           SDValue ReplOpOps[] = { ISR.getOperand(0), V, ISR.getOperand(2) };
6279           SDNode *ReplOp =
6280             CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, SDLoc(V),
6281                                    ISR.getNode()->getVTList(), ReplOpOps);
6282           Ops.push_back(SDValue(ReplOp, 0));
6283         } else {
6284           Ops.push_back(V);
6285         }
6286       }
6287 
6288       // Because all to-be-promoted nodes only have users that are other
6289       // promoted nodes (or the original INSERT_SUBREG), we can safely replace
6290       // the i32 result value type with i64.
6291 
6292       SmallVector<EVT, 2> NewVTs;
6293       SDVTList VTs = PN->getVTList();
6294       for (unsigned i = 0, ie = VTs.NumVTs; i != ie; ++i)
6295         if (VTs.VTs[i] == MVT::i32)
6296           NewVTs.push_back(MVT::i64);
6297         else
6298           NewVTs.push_back(VTs.VTs[i]);
6299 
6300       LLVM_DEBUG(dbgs() << "PPC64 ZExt Peephole morphing:\nOld:    ");
6301       LLVM_DEBUG(PN->dump(CurDAG));
6302 
6303       CurDAG->SelectNodeTo(PN, NewOpcode, CurDAG->getVTList(NewVTs), Ops);
6304 
6305       LLVM_DEBUG(dbgs() << "\nNew: ");
6306       LLVM_DEBUG(PN->dump(CurDAG));
6307       LLVM_DEBUG(dbgs() << "\n");
6308     }
6309 
6310     // Now we replace the original zero extend and its associated INSERT_SUBREG
6311     // with the value feeding the INSERT_SUBREG (which has now been promoted to
6312     // return an i64).
6313 
6314     LLVM_DEBUG(dbgs() << "PPC64 ZExt Peephole replacing:\nOld:    ");
6315     LLVM_DEBUG(N->dump(CurDAG));
6316     LLVM_DEBUG(dbgs() << "\nNew: ");
6317     LLVM_DEBUG(Op32.getNode()->dump(CurDAG));
6318     LLVM_DEBUG(dbgs() << "\n");
6319 
6320     ReplaceUses(N, Op32.getNode());
6321   }
6322 
6323   if (MadeChange)
6324     CurDAG->RemoveDeadNodes();
6325 }
6326 
6327 void PPCDAGToDAGISel::PeepholePPC64() {
6328   // These optimizations are currently supported only for 64-bit SVR4.
6329   if (PPCSubTarget->isDarwin() || !PPCSubTarget->isPPC64())
6330     return;
6331 
6332   SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
6333 
6334   while (Position != CurDAG->allnodes_begin()) {
6335     SDNode *N = &*--Position;
6336     // Skip dead nodes and any non-machine opcodes.
6337     if (N->use_empty() || !N->isMachineOpcode())
6338       continue;
6339 
6340     unsigned FirstOp;
6341     unsigned StorageOpcode = N->getMachineOpcode();
6342     bool RequiresMod4Offset = false;
6343 
6344     switch (StorageOpcode) {
6345     default: continue;
6346 
6347     case PPC::LWA:
6348     case PPC::LD:
6349     case PPC::DFLOADf64:
6350     case PPC::DFLOADf32:
6351       RequiresMod4Offset = true;
6352       LLVM_FALLTHROUGH;
6353     case PPC::LBZ:
6354     case PPC::LBZ8:
6355     case PPC::LFD:
6356     case PPC::LFS:
6357     case PPC::LHA:
6358     case PPC::LHA8:
6359     case PPC::LHZ:
6360     case PPC::LHZ8:
6361     case PPC::LWZ:
6362     case PPC::LWZ8:
6363       FirstOp = 0;
6364       break;
6365 
6366     case PPC::STD:
6367     case PPC::DFSTOREf64:
6368     case PPC::DFSTOREf32:
6369       RequiresMod4Offset = true;
6370       LLVM_FALLTHROUGH;
6371     case PPC::STB:
6372     case PPC::STB8:
6373     case PPC::STFD:
6374     case PPC::STFS:
6375     case PPC::STH:
6376     case PPC::STH8:
6377     case PPC::STW:
6378     case PPC::STW8:
6379       FirstOp = 1;
6380       break;
6381     }
6382 
6383     // If this is a load or store with a zero offset, or within the alignment,
6384     // we may be able to fold an add-immediate into the memory operation.
6385     // The check against alignment is below, as it can't occur until we check
6386     // the arguments to N
6387     if (!isa<ConstantSDNode>(N->getOperand(FirstOp)))
6388       continue;
6389 
6390     SDValue Base = N->getOperand(FirstOp + 1);
6391     if (!Base.isMachineOpcode())
6392       continue;
6393 
6394     unsigned Flags = 0;
6395     bool ReplaceFlags = true;
6396 
6397     // When the feeding operation is an add-immediate of some sort,
6398     // determine whether we need to add relocation information to the
6399     // target flags on the immediate operand when we fold it into the
6400     // load instruction.
6401     //
6402     // For something like ADDItocL, the relocation information is
6403     // inferred from the opcode; when we process it in the AsmPrinter,
6404     // we add the necessary relocation there.  A load, though, can receive
6405     // relocation from various flavors of ADDIxxx, so we need to carry
6406     // the relocation information in the target flags.
6407     switch (Base.getMachineOpcode()) {
6408     default: continue;
6409 
6410     case PPC::ADDI8:
6411     case PPC::ADDI:
6412       // In some cases (such as TLS) the relocation information
6413       // is already in place on the operand, so copying the operand
6414       // is sufficient.
6415       ReplaceFlags = false;
6416       // For these cases, the immediate may not be divisible by 4, in
6417       // which case the fold is illegal for DS-form instructions.  (The
6418       // other cases provide aligned addresses and are always safe.)
6419       if (RequiresMod4Offset &&
6420           (!isa<ConstantSDNode>(Base.getOperand(1)) ||
6421            Base.getConstantOperandVal(1) % 4 != 0))
6422         continue;
6423       break;
6424     case PPC::ADDIdtprelL:
6425       Flags = PPCII::MO_DTPREL_LO;
6426       break;
6427     case PPC::ADDItlsldL:
6428       Flags = PPCII::MO_TLSLD_LO;
6429       break;
6430     case PPC::ADDItocL:
6431       Flags = PPCII::MO_TOC_LO;
6432       break;
6433     }
6434 
6435     SDValue ImmOpnd = Base.getOperand(1);
6436 
6437     // On PPC64, the TOC base pointer is guaranteed by the ABI only to have
6438     // 8-byte alignment, and so we can only use offsets less than 8 (otherwise,
6439     // we might have needed different @ha relocation values for the offset
6440     // pointers).
6441     int MaxDisplacement = 7;
6442     if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
6443       const GlobalValue *GV = GA->getGlobal();
6444       MaxDisplacement = std::min((int) GV->getAlignment() - 1, MaxDisplacement);
6445     }
6446 
6447     bool UpdateHBase = false;
6448     SDValue HBase = Base.getOperand(0);
6449 
6450     int Offset = N->getConstantOperandVal(FirstOp);
6451     if (ReplaceFlags) {
6452       if (Offset < 0 || Offset > MaxDisplacement) {
6453         // If we have a addi(toc@l)/addis(toc@ha) pair, and the addis has only
6454         // one use, then we can do this for any offset, we just need to also
6455         // update the offset (i.e. the symbol addend) on the addis also.
6456         if (Base.getMachineOpcode() != PPC::ADDItocL)
6457           continue;
6458 
6459         if (!HBase.isMachineOpcode() ||
6460             HBase.getMachineOpcode() != PPC::ADDIStocHA)
6461           continue;
6462 
6463         if (!Base.hasOneUse() || !HBase.hasOneUse())
6464           continue;
6465 
6466         SDValue HImmOpnd = HBase.getOperand(1);
6467         if (HImmOpnd != ImmOpnd)
6468           continue;
6469 
6470         UpdateHBase = true;
6471       }
6472     } else {
6473       // If we're directly folding the addend from an addi instruction, then:
6474       //  1. In general, the offset on the memory access must be zero.
6475       //  2. If the addend is a constant, then it can be combined with a
6476       //     non-zero offset, but only if the result meets the encoding
6477       //     requirements.
6478       if (auto *C = dyn_cast<ConstantSDNode>(ImmOpnd)) {
6479         Offset += C->getSExtValue();
6480 
6481         if (RequiresMod4Offset && (Offset % 4) != 0)
6482           continue;
6483 
6484         if (!isInt<16>(Offset))
6485           continue;
6486 
6487         ImmOpnd = CurDAG->getTargetConstant(Offset, SDLoc(ImmOpnd),
6488                                             ImmOpnd.getValueType());
6489       } else if (Offset != 0) {
6490         continue;
6491       }
6492     }
6493 
6494     // We found an opportunity.  Reverse the operands from the add
6495     // immediate and substitute them into the load or store.  If
6496     // needed, update the target flags for the immediate operand to
6497     // reflect the necessary relocation information.
6498     LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase:    ");
6499     LLVM_DEBUG(Base->dump(CurDAG));
6500     LLVM_DEBUG(dbgs() << "\nN: ");
6501     LLVM_DEBUG(N->dump(CurDAG));
6502     LLVM_DEBUG(dbgs() << "\n");
6503 
6504     // If the relocation information isn't already present on the
6505     // immediate operand, add it now.
6506     if (ReplaceFlags) {
6507       if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
6508         SDLoc dl(GA);
6509         const GlobalValue *GV = GA->getGlobal();
6510         // We can't perform this optimization for data whose alignment
6511         // is insufficient for the instruction encoding.
6512         if (GV->getAlignment() < 4 &&
6513             (RequiresMod4Offset || (Offset % 4) != 0)) {
6514           LLVM_DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n");
6515           continue;
6516         }
6517         ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, Offset, Flags);
6518       } else if (ConstantPoolSDNode *CP =
6519                  dyn_cast<ConstantPoolSDNode>(ImmOpnd)) {
6520         const Constant *C = CP->getConstVal();
6521         ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64,
6522                                                 CP->getAlignment(),
6523                                                 Offset, Flags);
6524       }
6525     }
6526 
6527     if (FirstOp == 1) // Store
6528       (void)CurDAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd,
6529                                        Base.getOperand(0), N->getOperand(3));
6530     else // Load
6531       (void)CurDAG->UpdateNodeOperands(N, ImmOpnd, Base.getOperand(0),
6532                                        N->getOperand(2));
6533 
6534     if (UpdateHBase)
6535       (void)CurDAG->UpdateNodeOperands(HBase.getNode(), HBase.getOperand(0),
6536                                        ImmOpnd);
6537 
6538     // The add-immediate may now be dead, in which case remove it.
6539     if (Base.getNode()->use_empty())
6540       CurDAG->RemoveDeadNode(Base.getNode());
6541   }
6542 }
6543 
6544 /// createPPCISelDag - This pass converts a legalized DAG into a
6545 /// PowerPC-specific DAG, ready for instruction scheduling.
6546 ///
6547 FunctionPass *llvm::createPPCISelDag(PPCTargetMachine &TM,
6548                                      CodeGenOpt::Level OptLevel) {
6549   return new PPCDAGToDAGISel(TM, OptLevel);
6550 }
6551