1 //===-- PPCISelDAGToDAG.cpp - PPC --pattern matching inst selector --------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines a pattern matching instruction selector for PowerPC, 11 // converting from a legalized dag to a PPC dag. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "MCTargetDesc/PPCMCTargetDesc.h" 16 #include "MCTargetDesc/PPCPredicates.h" 17 #include "PPC.h" 18 #include "PPCISelLowering.h" 19 #include "PPCMachineFunctionInfo.h" 20 #include "PPCSubtarget.h" 21 #include "PPCTargetMachine.h" 22 #include "llvm/ADT/APInt.h" 23 #include "llvm/ADT/DenseMap.h" 24 #include "llvm/ADT/STLExtras.h" 25 #include "llvm/ADT/SmallPtrSet.h" 26 #include "llvm/ADT/SmallVector.h" 27 #include "llvm/ADT/Statistic.h" 28 #include "llvm/Analysis/BranchProbabilityInfo.h" 29 #include "llvm/CodeGen/FunctionLoweringInfo.h" 30 #include "llvm/CodeGen/ISDOpcodes.h" 31 #include "llvm/CodeGen/MachineBasicBlock.h" 32 #include "llvm/CodeGen/MachineFunction.h" 33 #include "llvm/CodeGen/MachineInstrBuilder.h" 34 #include "llvm/CodeGen/MachineRegisterInfo.h" 35 #include "llvm/CodeGen/MachineValueType.h" 36 #include "llvm/CodeGen/SelectionDAG.h" 37 #include "llvm/CodeGen/SelectionDAGISel.h" 38 #include "llvm/CodeGen/SelectionDAGNodes.h" 39 #include "llvm/CodeGen/TargetInstrInfo.h" 40 #include "llvm/CodeGen/TargetRegisterInfo.h" 41 #include "llvm/CodeGen/ValueTypes.h" 42 #include "llvm/IR/BasicBlock.h" 43 #include "llvm/IR/DebugLoc.h" 44 #include "llvm/IR/Function.h" 45 #include "llvm/IR/GlobalValue.h" 46 #include "llvm/IR/InlineAsm.h" 47 #include "llvm/IR/InstrTypes.h" 48 #include "llvm/IR/Module.h" 49 #include "llvm/Support/Casting.h" 50 #include "llvm/Support/CodeGen.h" 51 #include "llvm/Support/CommandLine.h" 52 #include "llvm/Support/Compiler.h" 53 #include "llvm/Support/Debug.h" 54 #include "llvm/Support/ErrorHandling.h" 55 #include "llvm/Support/KnownBits.h" 56 #include "llvm/Support/MathExtras.h" 57 #include "llvm/Support/raw_ostream.h" 58 #include <algorithm> 59 #include <cassert> 60 #include <cstdint> 61 #include <iterator> 62 #include <limits> 63 #include <memory> 64 #include <new> 65 #include <tuple> 66 #include <utility> 67 68 using namespace llvm; 69 70 #define DEBUG_TYPE "ppc-codegen" 71 72 STATISTIC(NumSextSetcc, 73 "Number of (sext(setcc)) nodes expanded into GPR sequence."); 74 STATISTIC(NumZextSetcc, 75 "Number of (zext(setcc)) nodes expanded into GPR sequence."); 76 STATISTIC(SignExtensionsAdded, 77 "Number of sign extensions for compare inputs added."); 78 STATISTIC(ZeroExtensionsAdded, 79 "Number of zero extensions for compare inputs added."); 80 STATISTIC(NumLogicOpsOnComparison, 81 "Number of logical ops on i1 values calculated in GPR."); 82 STATISTIC(OmittedForNonExtendUses, 83 "Number of compares not eliminated as they have non-extending uses."); 84 85 // FIXME: Remove this once the bug has been fixed! 86 cl::opt<bool> ANDIGlueBug("expose-ppc-andi-glue-bug", 87 cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden); 88 89 static cl::opt<bool> 90 UseBitPermRewriter("ppc-use-bit-perm-rewriter", cl::init(true), 91 cl::desc("use aggressive ppc isel for bit permutations"), 92 cl::Hidden); 93 static cl::opt<bool> BPermRewriterNoMasking( 94 "ppc-bit-perm-rewriter-stress-rotates", 95 cl::desc("stress rotate selection in aggressive ppc isel for " 96 "bit permutations"), 97 cl::Hidden); 98 99 static cl::opt<bool> EnableBranchHint( 100 "ppc-use-branch-hint", cl::init(true), 101 cl::desc("Enable static hinting of branches on ppc"), 102 cl::Hidden); 103 104 enum ICmpInGPRType { ICGPR_All, ICGPR_None, ICGPR_I32, ICGPR_I64, 105 ICGPR_NonExtIn, ICGPR_Zext, ICGPR_Sext, ICGPR_ZextI32, 106 ICGPR_SextI32, ICGPR_ZextI64, ICGPR_SextI64 }; 107 108 static cl::opt<ICmpInGPRType> CmpInGPR( 109 "ppc-gpr-icmps", cl::Hidden, cl::init(ICGPR_All), 110 cl::desc("Specify the types of comparisons to emit GPR-only code for."), 111 cl::values(clEnumValN(ICGPR_None, "none", "Do not modify integer comparisons."), 112 clEnumValN(ICGPR_All, "all", "All possible int comparisons in GPRs."), 113 clEnumValN(ICGPR_I32, "i32", "Only i32 comparisons in GPRs."), 114 clEnumValN(ICGPR_I64, "i64", "Only i64 comparisons in GPRs."), 115 clEnumValN(ICGPR_NonExtIn, "nonextin", 116 "Only comparisons where inputs don't need [sz]ext."), 117 clEnumValN(ICGPR_Zext, "zext", "Only comparisons with zext result."), 118 clEnumValN(ICGPR_ZextI32, "zexti32", 119 "Only i32 comparisons with zext result."), 120 clEnumValN(ICGPR_ZextI64, "zexti64", 121 "Only i64 comparisons with zext result."), 122 clEnumValN(ICGPR_Sext, "sext", "Only comparisons with sext result."), 123 clEnumValN(ICGPR_SextI32, "sexti32", 124 "Only i32 comparisons with sext result."), 125 clEnumValN(ICGPR_SextI64, "sexti64", 126 "Only i64 comparisons with sext result."))); 127 namespace { 128 129 //===--------------------------------------------------------------------===// 130 /// PPCDAGToDAGISel - PPC specific code to select PPC machine 131 /// instructions for SelectionDAG operations. 132 /// 133 class PPCDAGToDAGISel : public SelectionDAGISel { 134 const PPCTargetMachine &TM; 135 const PPCSubtarget *PPCSubTarget; 136 const PPCTargetLowering *PPCLowering; 137 unsigned GlobalBaseReg; 138 139 public: 140 explicit PPCDAGToDAGISel(PPCTargetMachine &tm, CodeGenOpt::Level OptLevel) 141 : SelectionDAGISel(tm, OptLevel), TM(tm) {} 142 143 bool runOnMachineFunction(MachineFunction &MF) override { 144 // Make sure we re-emit a set of the global base reg if necessary 145 GlobalBaseReg = 0; 146 PPCSubTarget = &MF.getSubtarget<PPCSubtarget>(); 147 PPCLowering = PPCSubTarget->getTargetLowering(); 148 SelectionDAGISel::runOnMachineFunction(MF); 149 150 if (!PPCSubTarget->isSVR4ABI()) 151 InsertVRSaveCode(MF); 152 153 return true; 154 } 155 156 void PreprocessISelDAG() override; 157 void PostprocessISelDAG() override; 158 159 /// getI16Imm - Return a target constant with the specified value, of type 160 /// i16. 161 inline SDValue getI16Imm(unsigned Imm, const SDLoc &dl) { 162 return CurDAG->getTargetConstant(Imm, dl, MVT::i16); 163 } 164 165 /// getI32Imm - Return a target constant with the specified value, of type 166 /// i32. 167 inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) { 168 return CurDAG->getTargetConstant(Imm, dl, MVT::i32); 169 } 170 171 /// getI64Imm - Return a target constant with the specified value, of type 172 /// i64. 173 inline SDValue getI64Imm(uint64_t Imm, const SDLoc &dl) { 174 return CurDAG->getTargetConstant(Imm, dl, MVT::i64); 175 } 176 177 /// getSmallIPtrImm - Return a target constant of pointer type. 178 inline SDValue getSmallIPtrImm(unsigned Imm, const SDLoc &dl) { 179 return CurDAG->getTargetConstant( 180 Imm, dl, PPCLowering->getPointerTy(CurDAG->getDataLayout())); 181 } 182 183 /// isRotateAndMask - Returns true if Mask and Shift can be folded into a 184 /// rotate and mask opcode and mask operation. 185 static bool isRotateAndMask(SDNode *N, unsigned Mask, bool isShiftMask, 186 unsigned &SH, unsigned &MB, unsigned &ME); 187 188 /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC 189 /// base register. Return the virtual register that holds this value. 190 SDNode *getGlobalBaseReg(); 191 192 void selectFrameIndex(SDNode *SN, SDNode *N, unsigned Offset = 0); 193 194 // Select - Convert the specified operand from a target-independent to a 195 // target-specific node if it hasn't already been changed. 196 void Select(SDNode *N) override; 197 198 bool tryBitfieldInsert(SDNode *N); 199 bool tryBitPermutation(SDNode *N); 200 bool tryIntCompareInGPR(SDNode *N); 201 202 /// SelectCC - Select a comparison of the specified values with the 203 /// specified condition code, returning the CR# of the expression. 204 SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, 205 const SDLoc &dl); 206 207 /// SelectAddrImm - Returns true if the address N can be represented by 208 /// a base register plus a signed 16-bit displacement [r+imm]. 209 bool SelectAddrImm(SDValue N, SDValue &Disp, 210 SDValue &Base) { 211 return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 0); 212 } 213 214 /// SelectAddrImmOffs - Return true if the operand is valid for a preinc 215 /// immediate field. Note that the operand at this point is already the 216 /// result of a prior SelectAddressRegImm call. 217 bool SelectAddrImmOffs(SDValue N, SDValue &Out) const { 218 if (N.getOpcode() == ISD::TargetConstant || 219 N.getOpcode() == ISD::TargetGlobalAddress) { 220 Out = N; 221 return true; 222 } 223 224 return false; 225 } 226 227 /// SelectAddrIdx - Given the specified addressed, check to see if it can be 228 /// represented as an indexed [r+r] operation. Returns false if it can 229 /// be represented by [r+imm], which are preferred. 230 bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) { 231 return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG); 232 } 233 234 /// SelectAddrIdxOnly - Given the specified addressed, force it to be 235 /// represented as an indexed [r+r] operation. 236 bool SelectAddrIdxOnly(SDValue N, SDValue &Base, SDValue &Index) { 237 return PPCLowering->SelectAddressRegRegOnly(N, Base, Index, *CurDAG); 238 } 239 240 /// SelectAddrImmX4 - Returns true if the address N can be represented by 241 /// a base register plus a signed 16-bit displacement that is a multiple of 4. 242 /// Suitable for use by STD and friends. 243 bool SelectAddrImmX4(SDValue N, SDValue &Disp, SDValue &Base) { 244 return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 4); 245 } 246 247 bool SelectAddrImmX16(SDValue N, SDValue &Disp, SDValue &Base) { 248 return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 16); 249 } 250 251 // Select an address into a single register. 252 bool SelectAddr(SDValue N, SDValue &Base) { 253 Base = N; 254 return true; 255 } 256 257 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 258 /// inline asm expressions. It is always correct to compute the value into 259 /// a register. The case of adding a (possibly relocatable) constant to a 260 /// register can be improved, but it is wrong to substitute Reg+Reg for 261 /// Reg in an asm, because the load or store opcode would have to change. 262 bool SelectInlineAsmMemoryOperand(const SDValue &Op, 263 unsigned ConstraintID, 264 std::vector<SDValue> &OutOps) override { 265 switch(ConstraintID) { 266 default: 267 errs() << "ConstraintID: " << ConstraintID << "\n"; 268 llvm_unreachable("Unexpected asm memory constraint"); 269 case InlineAsm::Constraint_es: 270 case InlineAsm::Constraint_i: 271 case InlineAsm::Constraint_m: 272 case InlineAsm::Constraint_o: 273 case InlineAsm::Constraint_Q: 274 case InlineAsm::Constraint_Z: 275 case InlineAsm::Constraint_Zy: 276 // We need to make sure that this one operand does not end up in r0 277 // (because we might end up lowering this as 0(%op)). 278 const TargetRegisterInfo *TRI = PPCSubTarget->getRegisterInfo(); 279 const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF, /*Kind=*/1); 280 SDLoc dl(Op); 281 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i32); 282 SDValue NewOp = 283 SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, 284 dl, Op.getValueType(), 285 Op, RC), 0); 286 287 OutOps.push_back(NewOp); 288 return false; 289 } 290 return true; 291 } 292 293 void InsertVRSaveCode(MachineFunction &MF); 294 295 StringRef getPassName() const override { 296 return "PowerPC DAG->DAG Pattern Instruction Selection"; 297 } 298 299 // Include the pieces autogenerated from the target description. 300 #include "PPCGenDAGISel.inc" 301 302 private: 303 bool trySETCC(SDNode *N); 304 305 void PeepholePPC64(); 306 void PeepholePPC64ZExt(); 307 void PeepholeCROps(); 308 309 SDValue combineToCMPB(SDNode *N); 310 void foldBoolExts(SDValue &Res, SDNode *&N); 311 312 bool AllUsersSelectZero(SDNode *N); 313 void SwapAllSelectUsers(SDNode *N); 314 315 bool isOffsetMultipleOf(SDNode *N, unsigned Val) const; 316 void transferMemOperands(SDNode *N, SDNode *Result); 317 }; 318 319 } // end anonymous namespace 320 321 /// InsertVRSaveCode - Once the entire function has been instruction selected, 322 /// all virtual registers are created and all machine instructions are built, 323 /// check to see if we need to save/restore VRSAVE. If so, do it. 324 void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) { 325 // Check to see if this function uses vector registers, which means we have to 326 // save and restore the VRSAVE register and update it with the regs we use. 327 // 328 // In this case, there will be virtual registers of vector type created 329 // by the scheduler. Detect them now. 330 bool HasVectorVReg = false; 331 for (unsigned i = 0, e = RegInfo->getNumVirtRegs(); i != e; ++i) { 332 unsigned Reg = TargetRegisterInfo::index2VirtReg(i); 333 if (RegInfo->getRegClass(Reg) == &PPC::VRRCRegClass) { 334 HasVectorVReg = true; 335 break; 336 } 337 } 338 if (!HasVectorVReg) return; // nothing to do. 339 340 // If we have a vector register, we want to emit code into the entry and exit 341 // blocks to save and restore the VRSAVE register. We do this here (instead 342 // of marking all vector instructions as clobbering VRSAVE) for two reasons: 343 // 344 // 1. This (trivially) reduces the load on the register allocator, by not 345 // having to represent the live range of the VRSAVE register. 346 // 2. This (more significantly) allows us to create a temporary virtual 347 // register to hold the saved VRSAVE value, allowing this temporary to be 348 // register allocated, instead of forcing it to be spilled to the stack. 349 350 // Create two vregs - one to hold the VRSAVE register that is live-in to the 351 // function and one for the value after having bits or'd into it. 352 unsigned InVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass); 353 unsigned UpdatedVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass); 354 355 const TargetInstrInfo &TII = *PPCSubTarget->getInstrInfo(); 356 MachineBasicBlock &EntryBB = *Fn.begin(); 357 DebugLoc dl; 358 // Emit the following code into the entry block: 359 // InVRSAVE = MFVRSAVE 360 // UpdatedVRSAVE = UPDATE_VRSAVE InVRSAVE 361 // MTVRSAVE UpdatedVRSAVE 362 MachineBasicBlock::iterator IP = EntryBB.begin(); // Insert Point 363 BuildMI(EntryBB, IP, dl, TII.get(PPC::MFVRSAVE), InVRSAVE); 364 BuildMI(EntryBB, IP, dl, TII.get(PPC::UPDATE_VRSAVE), 365 UpdatedVRSAVE).addReg(InVRSAVE); 366 BuildMI(EntryBB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(UpdatedVRSAVE); 367 368 // Find all return blocks, outputting a restore in each epilog. 369 for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { 370 if (BB->isReturnBlock()) { 371 IP = BB->end(); --IP; 372 373 // Skip over all terminator instructions, which are part of the return 374 // sequence. 375 MachineBasicBlock::iterator I2 = IP; 376 while (I2 != BB->begin() && (--I2)->isTerminator()) 377 IP = I2; 378 379 // Emit: MTVRSAVE InVRSave 380 BuildMI(*BB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(InVRSAVE); 381 } 382 } 383 } 384 385 /// getGlobalBaseReg - Output the instructions required to put the 386 /// base address to use for accessing globals into a register. 387 /// 388 SDNode *PPCDAGToDAGISel::getGlobalBaseReg() { 389 if (!GlobalBaseReg) { 390 const TargetInstrInfo &TII = *PPCSubTarget->getInstrInfo(); 391 // Insert the set of GlobalBaseReg into the first MBB of the function 392 MachineBasicBlock &FirstMBB = MF->front(); 393 MachineBasicBlock::iterator MBBI = FirstMBB.begin(); 394 const Module *M = MF->getFunction().getParent(); 395 DebugLoc dl; 396 397 if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) == MVT::i32) { 398 if (PPCSubTarget->isTargetELF()) { 399 GlobalBaseReg = PPC::R30; 400 if (M->getPICLevel() == PICLevel::SmallPIC) { 401 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MoveGOTtoLR)); 402 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg); 403 MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true); 404 } else { 405 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR)); 406 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg); 407 unsigned TempReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass); 408 BuildMI(FirstMBB, MBBI, dl, 409 TII.get(PPC::UpdateGBR), GlobalBaseReg) 410 .addReg(TempReg, RegState::Define).addReg(GlobalBaseReg); 411 MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true); 412 } 413 } else { 414 GlobalBaseReg = 415 RegInfo->createVirtualRegister(&PPC::GPRC_and_GPRC_NOR0RegClass); 416 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR)); 417 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg); 418 } 419 } else { 420 GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass); 421 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8)); 422 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR8), GlobalBaseReg); 423 } 424 } 425 return CurDAG->getRegister(GlobalBaseReg, 426 PPCLowering->getPointerTy(CurDAG->getDataLayout())) 427 .getNode(); 428 } 429 430 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant 431 /// operand. If so Imm will receive the 32-bit value. 432 static bool isInt32Immediate(SDNode *N, unsigned &Imm) { 433 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) { 434 Imm = cast<ConstantSDNode>(N)->getZExtValue(); 435 return true; 436 } 437 return false; 438 } 439 440 /// isInt64Immediate - This method tests to see if the node is a 64-bit constant 441 /// operand. If so Imm will receive the 64-bit value. 442 static bool isInt64Immediate(SDNode *N, uint64_t &Imm) { 443 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i64) { 444 Imm = cast<ConstantSDNode>(N)->getZExtValue(); 445 return true; 446 } 447 return false; 448 } 449 450 // isInt32Immediate - This method tests to see if a constant operand. 451 // If so Imm will receive the 32 bit value. 452 static bool isInt32Immediate(SDValue N, unsigned &Imm) { 453 return isInt32Immediate(N.getNode(), Imm); 454 } 455 456 /// isInt64Immediate - This method tests to see if the value is a 64-bit 457 /// constant operand. If so Imm will receive the 64-bit value. 458 static bool isInt64Immediate(SDValue N, uint64_t &Imm) { 459 return isInt64Immediate(N.getNode(), Imm); 460 } 461 462 static unsigned getBranchHint(unsigned PCC, FunctionLoweringInfo *FuncInfo, 463 const SDValue &DestMBB) { 464 assert(isa<BasicBlockSDNode>(DestMBB)); 465 466 if (!FuncInfo->BPI) return PPC::BR_NO_HINT; 467 468 const BasicBlock *BB = FuncInfo->MBB->getBasicBlock(); 469 const TerminatorInst *BBTerm = BB->getTerminator(); 470 471 if (BBTerm->getNumSuccessors() != 2) return PPC::BR_NO_HINT; 472 473 const BasicBlock *TBB = BBTerm->getSuccessor(0); 474 const BasicBlock *FBB = BBTerm->getSuccessor(1); 475 476 auto TProb = FuncInfo->BPI->getEdgeProbability(BB, TBB); 477 auto FProb = FuncInfo->BPI->getEdgeProbability(BB, FBB); 478 479 // We only want to handle cases which are easy to predict at static time, e.g. 480 // C++ throw statement, that is very likely not taken, or calling never 481 // returned function, e.g. stdlib exit(). So we set Threshold to filter 482 // unwanted cases. 483 // 484 // Below is LLVM branch weight table, we only want to handle case 1, 2 485 // 486 // Case Taken:Nontaken Example 487 // 1. Unreachable 1048575:1 C++ throw, stdlib exit(), 488 // 2. Invoke-terminating 1:1048575 489 // 3. Coldblock 4:64 __builtin_expect 490 // 4. Loop Branch 124:4 For loop 491 // 5. PH/ZH/FPH 20:12 492 const uint32_t Threshold = 10000; 493 494 if (std::max(TProb, FProb) / Threshold < std::min(TProb, FProb)) 495 return PPC::BR_NO_HINT; 496 497 DEBUG(dbgs() << "Use branch hint for '" << FuncInfo->Fn->getName() << "::" 498 << BB->getName() << "'\n" 499 << " -> " << TBB->getName() << ": " << TProb << "\n" 500 << " -> " << FBB->getName() << ": " << FProb << "\n"); 501 502 const BasicBlockSDNode *BBDN = cast<BasicBlockSDNode>(DestMBB); 503 504 // If Dest BasicBlock is False-BasicBlock (FBB), swap branch probabilities, 505 // because we want 'TProb' stands for 'branch probability' to Dest BasicBlock 506 if (BBDN->getBasicBlock()->getBasicBlock() != TBB) 507 std::swap(TProb, FProb); 508 509 return (TProb > FProb) ? PPC::BR_TAKEN_HINT : PPC::BR_NONTAKEN_HINT; 510 } 511 512 // isOpcWithIntImmediate - This method tests to see if the node is a specific 513 // opcode and that it has a immediate integer right operand. 514 // If so Imm will receive the 32 bit value. 515 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) { 516 return N->getOpcode() == Opc 517 && isInt32Immediate(N->getOperand(1).getNode(), Imm); 518 } 519 520 void PPCDAGToDAGISel::selectFrameIndex(SDNode *SN, SDNode *N, unsigned Offset) { 521 SDLoc dl(SN); 522 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 523 SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0)); 524 unsigned Opc = N->getValueType(0) == MVT::i32 ? PPC::ADDI : PPC::ADDI8; 525 if (SN->hasOneUse()) 526 CurDAG->SelectNodeTo(SN, Opc, N->getValueType(0), TFI, 527 getSmallIPtrImm(Offset, dl)); 528 else 529 ReplaceNode(SN, CurDAG->getMachineNode(Opc, dl, N->getValueType(0), TFI, 530 getSmallIPtrImm(Offset, dl))); 531 } 532 533 bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask, 534 bool isShiftMask, unsigned &SH, 535 unsigned &MB, unsigned &ME) { 536 // Don't even go down this path for i64, since different logic will be 537 // necessary for rldicl/rldicr/rldimi. 538 if (N->getValueType(0) != MVT::i32) 539 return false; 540 541 unsigned Shift = 32; 542 unsigned Indeterminant = ~0; // bit mask marking indeterminant results 543 unsigned Opcode = N->getOpcode(); 544 if (N->getNumOperands() != 2 || 545 !isInt32Immediate(N->getOperand(1).getNode(), Shift) || (Shift > 31)) 546 return false; 547 548 if (Opcode == ISD::SHL) { 549 // apply shift left to mask if it comes first 550 if (isShiftMask) Mask = Mask << Shift; 551 // determine which bits are made indeterminant by shift 552 Indeterminant = ~(0xFFFFFFFFu << Shift); 553 } else if (Opcode == ISD::SRL) { 554 // apply shift right to mask if it comes first 555 if (isShiftMask) Mask = Mask >> Shift; 556 // determine which bits are made indeterminant by shift 557 Indeterminant = ~(0xFFFFFFFFu >> Shift); 558 // adjust for the left rotate 559 Shift = 32 - Shift; 560 } else if (Opcode == ISD::ROTL) { 561 Indeterminant = 0; 562 } else { 563 return false; 564 } 565 566 // if the mask doesn't intersect any Indeterminant bits 567 if (Mask && !(Mask & Indeterminant)) { 568 SH = Shift & 31; 569 // make sure the mask is still a mask (wrap arounds may not be) 570 return isRunOfOnes(Mask, MB, ME); 571 } 572 return false; 573 } 574 575 /// Turn an or of two masked values into the rotate left word immediate then 576 /// mask insert (rlwimi) instruction. 577 bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) { 578 SDValue Op0 = N->getOperand(0); 579 SDValue Op1 = N->getOperand(1); 580 SDLoc dl(N); 581 582 KnownBits LKnown, RKnown; 583 CurDAG->computeKnownBits(Op0, LKnown); 584 CurDAG->computeKnownBits(Op1, RKnown); 585 586 unsigned TargetMask = LKnown.Zero.getZExtValue(); 587 unsigned InsertMask = RKnown.Zero.getZExtValue(); 588 589 if ((TargetMask | InsertMask) == 0xFFFFFFFF) { 590 unsigned Op0Opc = Op0.getOpcode(); 591 unsigned Op1Opc = Op1.getOpcode(); 592 unsigned Value, SH = 0; 593 TargetMask = ~TargetMask; 594 InsertMask = ~InsertMask; 595 596 // If the LHS has a foldable shift and the RHS does not, then swap it to the 597 // RHS so that we can fold the shift into the insert. 598 if (Op0Opc == ISD::AND && Op1Opc == ISD::AND) { 599 if (Op0.getOperand(0).getOpcode() == ISD::SHL || 600 Op0.getOperand(0).getOpcode() == ISD::SRL) { 601 if (Op1.getOperand(0).getOpcode() != ISD::SHL && 602 Op1.getOperand(0).getOpcode() != ISD::SRL) { 603 std::swap(Op0, Op1); 604 std::swap(Op0Opc, Op1Opc); 605 std::swap(TargetMask, InsertMask); 606 } 607 } 608 } else if (Op0Opc == ISD::SHL || Op0Opc == ISD::SRL) { 609 if (Op1Opc == ISD::AND && Op1.getOperand(0).getOpcode() != ISD::SHL && 610 Op1.getOperand(0).getOpcode() != ISD::SRL) { 611 std::swap(Op0, Op1); 612 std::swap(Op0Opc, Op1Opc); 613 std::swap(TargetMask, InsertMask); 614 } 615 } 616 617 unsigned MB, ME; 618 if (isRunOfOnes(InsertMask, MB, ME)) { 619 if ((Op1Opc == ISD::SHL || Op1Opc == ISD::SRL) && 620 isInt32Immediate(Op1.getOperand(1), Value)) { 621 Op1 = Op1.getOperand(0); 622 SH = (Op1Opc == ISD::SHL) ? Value : 32 - Value; 623 } 624 if (Op1Opc == ISD::AND) { 625 // The AND mask might not be a constant, and we need to make sure that 626 // if we're going to fold the masking with the insert, all bits not 627 // know to be zero in the mask are known to be one. 628 KnownBits MKnown; 629 CurDAG->computeKnownBits(Op1.getOperand(1), MKnown); 630 bool CanFoldMask = InsertMask == MKnown.One.getZExtValue(); 631 632 unsigned SHOpc = Op1.getOperand(0).getOpcode(); 633 if ((SHOpc == ISD::SHL || SHOpc == ISD::SRL) && CanFoldMask && 634 isInt32Immediate(Op1.getOperand(0).getOperand(1), Value)) { 635 // Note that Value must be in range here (less than 32) because 636 // otherwise there would not be any bits set in InsertMask. 637 Op1 = Op1.getOperand(0).getOperand(0); 638 SH = (SHOpc == ISD::SHL) ? Value : 32 - Value; 639 } 640 } 641 642 SH &= 31; 643 SDValue Ops[] = { Op0, Op1, getI32Imm(SH, dl), getI32Imm(MB, dl), 644 getI32Imm(ME, dl) }; 645 ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops)); 646 return true; 647 } 648 } 649 return false; 650 } 651 652 // Predict the number of instructions that would be generated by calling 653 // selectI64Imm(N). 654 static unsigned selectI64ImmInstrCountDirect(int64_t Imm) { 655 // Assume no remaining bits. 656 unsigned Remainder = 0; 657 // Assume no shift required. 658 unsigned Shift = 0; 659 660 // If it can't be represented as a 32 bit value. 661 if (!isInt<32>(Imm)) { 662 Shift = countTrailingZeros<uint64_t>(Imm); 663 int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift; 664 665 // If the shifted value fits 32 bits. 666 if (isInt<32>(ImmSh)) { 667 // Go with the shifted value. 668 Imm = ImmSh; 669 } else { 670 // Still stuck with a 64 bit value. 671 Remainder = Imm; 672 Shift = 32; 673 Imm >>= 32; 674 } 675 } 676 677 // Intermediate operand. 678 unsigned Result = 0; 679 680 // Handle first 32 bits. 681 unsigned Lo = Imm & 0xFFFF; 682 683 // Simple value. 684 if (isInt<16>(Imm)) { 685 // Just the Lo bits. 686 ++Result; 687 } else if (Lo) { 688 // Handle the Hi bits and Lo bits. 689 Result += 2; 690 } else { 691 // Just the Hi bits. 692 ++Result; 693 } 694 695 // If no shift, we're done. 696 if (!Shift) return Result; 697 698 // If Hi word == Lo word, 699 // we can use rldimi to insert the Lo word into Hi word. 700 if ((unsigned)(Imm & 0xFFFFFFFF) == Remainder) { 701 ++Result; 702 return Result; 703 } 704 705 // Shift for next step if the upper 32-bits were not zero. 706 if (Imm) 707 ++Result; 708 709 // Add in the last bits as required. 710 if ((Remainder >> 16) & 0xFFFF) 711 ++Result; 712 if (Remainder & 0xFFFF) 713 ++Result; 714 715 return Result; 716 } 717 718 static uint64_t Rot64(uint64_t Imm, unsigned R) { 719 return (Imm << R) | (Imm >> (64 - R)); 720 } 721 722 static unsigned selectI64ImmInstrCount(int64_t Imm) { 723 unsigned Count = selectI64ImmInstrCountDirect(Imm); 724 725 // If the instruction count is 1 or 2, we do not need further analysis 726 // since rotate + load constant requires at least 2 instructions. 727 if (Count <= 2) 728 return Count; 729 730 for (unsigned r = 1; r < 63; ++r) { 731 uint64_t RImm = Rot64(Imm, r); 732 unsigned RCount = selectI64ImmInstrCountDirect(RImm) + 1; 733 Count = std::min(Count, RCount); 734 735 // See comments in selectI64Imm for an explanation of the logic below. 736 unsigned LS = findLastSet(RImm); 737 if (LS != r-1) 738 continue; 739 740 uint64_t OnesMask = -(int64_t) (UINT64_C(1) << (LS+1)); 741 uint64_t RImmWithOnes = RImm | OnesMask; 742 743 RCount = selectI64ImmInstrCountDirect(RImmWithOnes) + 1; 744 Count = std::min(Count, RCount); 745 } 746 747 return Count; 748 } 749 750 // Select a 64-bit constant. For cost-modeling purposes, selectI64ImmInstrCount 751 // (above) needs to be kept in sync with this function. 752 static SDNode *selectI64ImmDirect(SelectionDAG *CurDAG, const SDLoc &dl, 753 int64_t Imm) { 754 // Assume no remaining bits. 755 unsigned Remainder = 0; 756 // Assume no shift required. 757 unsigned Shift = 0; 758 759 // If it can't be represented as a 32 bit value. 760 if (!isInt<32>(Imm)) { 761 Shift = countTrailingZeros<uint64_t>(Imm); 762 int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift; 763 764 // If the shifted value fits 32 bits. 765 if (isInt<32>(ImmSh)) { 766 // Go with the shifted value. 767 Imm = ImmSh; 768 } else { 769 // Still stuck with a 64 bit value. 770 Remainder = Imm; 771 Shift = 32; 772 Imm >>= 32; 773 } 774 } 775 776 // Intermediate operand. 777 SDNode *Result; 778 779 // Handle first 32 bits. 780 unsigned Lo = Imm & 0xFFFF; 781 unsigned Hi = (Imm >> 16) & 0xFFFF; 782 783 auto getI32Imm = [CurDAG, dl](unsigned Imm) { 784 return CurDAG->getTargetConstant(Imm, dl, MVT::i32); 785 }; 786 787 // Simple value. 788 if (isInt<16>(Imm)) { 789 uint64_t SextImm = SignExtend64(Lo, 16); 790 SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64); 791 // Just the Lo bits. 792 Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm); 793 } else if (Lo) { 794 // Handle the Hi bits. 795 unsigned OpC = Hi ? PPC::LIS8 : PPC::LI8; 796 Result = CurDAG->getMachineNode(OpC, dl, MVT::i64, getI32Imm(Hi)); 797 // And Lo bits. 798 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, 799 SDValue(Result, 0), getI32Imm(Lo)); 800 } else { 801 // Just the Hi bits. 802 Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(Hi)); 803 } 804 805 // If no shift, we're done. 806 if (!Shift) return Result; 807 808 // If Hi word == Lo word, 809 // we can use rldimi to insert the Lo word into Hi word. 810 if ((unsigned)(Imm & 0xFFFFFFFF) == Remainder) { 811 SDValue Ops[] = 812 { SDValue(Result, 0), SDValue(Result, 0), getI32Imm(Shift), getI32Imm(0)}; 813 return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops); 814 } 815 816 // Shift for next step if the upper 32-bits were not zero. 817 if (Imm) { 818 Result = CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, 819 SDValue(Result, 0), 820 getI32Imm(Shift), 821 getI32Imm(63 - Shift)); 822 } 823 824 // Add in the last bits as required. 825 if ((Hi = (Remainder >> 16) & 0xFFFF)) { 826 Result = CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64, 827 SDValue(Result, 0), getI32Imm(Hi)); 828 } 829 if ((Lo = Remainder & 0xFFFF)) { 830 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, 831 SDValue(Result, 0), getI32Imm(Lo)); 832 } 833 834 return Result; 835 } 836 837 static SDNode *selectI64Imm(SelectionDAG *CurDAG, const SDLoc &dl, 838 int64_t Imm) { 839 unsigned Count = selectI64ImmInstrCountDirect(Imm); 840 841 // If the instruction count is 1 or 2, we do not need further analysis 842 // since rotate + load constant requires at least 2 instructions. 843 if (Count <= 2) 844 return selectI64ImmDirect(CurDAG, dl, Imm); 845 846 unsigned RMin = 0; 847 848 int64_t MatImm; 849 unsigned MaskEnd; 850 851 for (unsigned r = 1; r < 63; ++r) { 852 uint64_t RImm = Rot64(Imm, r); 853 unsigned RCount = selectI64ImmInstrCountDirect(RImm) + 1; 854 if (RCount < Count) { 855 Count = RCount; 856 RMin = r; 857 MatImm = RImm; 858 MaskEnd = 63; 859 } 860 861 // If the immediate to generate has many trailing zeros, it might be 862 // worthwhile to generate a rotated value with too many leading ones 863 // (because that's free with li/lis's sign-extension semantics), and then 864 // mask them off after rotation. 865 866 unsigned LS = findLastSet(RImm); 867 // We're adding (63-LS) higher-order ones, and we expect to mask them off 868 // after performing the inverse rotation by (64-r). So we need that: 869 // 63-LS == 64-r => LS == r-1 870 if (LS != r-1) 871 continue; 872 873 uint64_t OnesMask = -(int64_t) (UINT64_C(1) << (LS+1)); 874 uint64_t RImmWithOnes = RImm | OnesMask; 875 876 RCount = selectI64ImmInstrCountDirect(RImmWithOnes) + 1; 877 if (RCount < Count) { 878 Count = RCount; 879 RMin = r; 880 MatImm = RImmWithOnes; 881 MaskEnd = LS; 882 } 883 } 884 885 if (!RMin) 886 return selectI64ImmDirect(CurDAG, dl, Imm); 887 888 auto getI32Imm = [CurDAG, dl](unsigned Imm) { 889 return CurDAG->getTargetConstant(Imm, dl, MVT::i32); 890 }; 891 892 SDValue Val = SDValue(selectI64ImmDirect(CurDAG, dl, MatImm), 0); 893 return CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Val, 894 getI32Imm(64 - RMin), getI32Imm(MaskEnd)); 895 } 896 897 static unsigned allUsesTruncate(SelectionDAG *CurDAG, SDNode *N) { 898 unsigned MaxTruncation = 0; 899 // Cannot use range-based for loop here as we need the actual use (i.e. we 900 // need the operand number corresponding to the use). A range-based for 901 // will unbox the use and provide an SDNode*. 902 for (SDNode::use_iterator Use = N->use_begin(), UseEnd = N->use_end(); 903 Use != UseEnd; ++Use) { 904 unsigned Opc = 905 Use->isMachineOpcode() ? Use->getMachineOpcode() : Use->getOpcode(); 906 switch (Opc) { 907 default: return 0; 908 case ISD::TRUNCATE: 909 if (Use->isMachineOpcode()) 910 return 0; 911 MaxTruncation = 912 std::max(MaxTruncation, Use->getValueType(0).getSizeInBits()); 913 continue; 914 case ISD::STORE: { 915 if (Use->isMachineOpcode()) 916 return 0; 917 StoreSDNode *STN = cast<StoreSDNode>(*Use); 918 unsigned MemVTSize = STN->getMemoryVT().getSizeInBits(); 919 if (MemVTSize == 64 || Use.getOperandNo() != 0) 920 return 0; 921 MaxTruncation = std::max(MaxTruncation, MemVTSize); 922 continue; 923 } 924 case PPC::STW8: 925 case PPC::STWX8: 926 case PPC::STWU8: 927 case PPC::STWUX8: 928 if (Use.getOperandNo() != 0) 929 return 0; 930 MaxTruncation = std::max(MaxTruncation, 32u); 931 continue; 932 case PPC::STH8: 933 case PPC::STHX8: 934 case PPC::STHU8: 935 case PPC::STHUX8: 936 if (Use.getOperandNo() != 0) 937 return 0; 938 MaxTruncation = std::max(MaxTruncation, 16u); 939 continue; 940 case PPC::STB8: 941 case PPC::STBX8: 942 case PPC::STBU8: 943 case PPC::STBUX8: 944 if (Use.getOperandNo() != 0) 945 return 0; 946 MaxTruncation = std::max(MaxTruncation, 8u); 947 continue; 948 } 949 } 950 return MaxTruncation; 951 } 952 953 // Select a 64-bit constant. 954 static SDNode *selectI64Imm(SelectionDAG *CurDAG, SDNode *N) { 955 SDLoc dl(N); 956 957 // Get 64 bit value. 958 int64_t Imm = cast<ConstantSDNode>(N)->getZExtValue(); 959 if (unsigned MinSize = allUsesTruncate(CurDAG, N)) { 960 uint64_t SextImm = SignExtend64(Imm, MinSize); 961 SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64); 962 if (isInt<16>(SextImm)) 963 return CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm); 964 } 965 return selectI64Imm(CurDAG, dl, Imm); 966 } 967 968 namespace { 969 970 class BitPermutationSelector { 971 struct ValueBit { 972 SDValue V; 973 974 // The bit number in the value, using a convention where bit 0 is the 975 // lowest-order bit. 976 unsigned Idx; 977 978 enum Kind { 979 ConstZero, 980 Variable 981 } K; 982 983 ValueBit(SDValue V, unsigned I, Kind K = Variable) 984 : V(V), Idx(I), K(K) {} 985 ValueBit(Kind K = Variable) 986 : V(SDValue(nullptr, 0)), Idx(UINT32_MAX), K(K) {} 987 988 bool isZero() const { 989 return K == ConstZero; 990 } 991 992 bool hasValue() const { 993 return K == Variable; 994 } 995 996 SDValue getValue() const { 997 assert(hasValue() && "Cannot get the value of a constant bit"); 998 return V; 999 } 1000 1001 unsigned getValueBitIndex() const { 1002 assert(hasValue() && "Cannot get the value bit index of a constant bit"); 1003 return Idx; 1004 } 1005 }; 1006 1007 // A bit group has the same underlying value and the same rotate factor. 1008 struct BitGroup { 1009 SDValue V; 1010 unsigned RLAmt; 1011 unsigned StartIdx, EndIdx; 1012 1013 // This rotation amount assumes that the lower 32 bits of the quantity are 1014 // replicated in the high 32 bits by the rotation operator (which is done 1015 // by rlwinm and friends in 64-bit mode). 1016 bool Repl32; 1017 // Did converting to Repl32 == true change the rotation factor? If it did, 1018 // it decreased it by 32. 1019 bool Repl32CR; 1020 // Was this group coalesced after setting Repl32 to true? 1021 bool Repl32Coalesced; 1022 1023 BitGroup(SDValue V, unsigned R, unsigned S, unsigned E) 1024 : V(V), RLAmt(R), StartIdx(S), EndIdx(E), Repl32(false), Repl32CR(false), 1025 Repl32Coalesced(false) { 1026 DEBUG(dbgs() << "\tbit group for " << V.getNode() << " RLAmt = " << R << 1027 " [" << S << ", " << E << "]\n"); 1028 } 1029 }; 1030 1031 // Information on each (Value, RLAmt) pair (like the number of groups 1032 // associated with each) used to choose the lowering method. 1033 struct ValueRotInfo { 1034 SDValue V; 1035 unsigned RLAmt = std::numeric_limits<unsigned>::max(); 1036 unsigned NumGroups = 0; 1037 unsigned FirstGroupStartIdx = std::numeric_limits<unsigned>::max(); 1038 bool Repl32 = false; 1039 1040 ValueRotInfo() = default; 1041 1042 // For sorting (in reverse order) by NumGroups, and then by 1043 // FirstGroupStartIdx. 1044 bool operator < (const ValueRotInfo &Other) const { 1045 // We need to sort so that the non-Repl32 come first because, when we're 1046 // doing masking, the Repl32 bit groups might be subsumed into the 64-bit 1047 // masking operation. 1048 if (Repl32 < Other.Repl32) 1049 return true; 1050 else if (Repl32 > Other.Repl32) 1051 return false; 1052 else if (NumGroups > Other.NumGroups) 1053 return true; 1054 else if (NumGroups < Other.NumGroups) 1055 return false; 1056 else if (FirstGroupStartIdx < Other.FirstGroupStartIdx) 1057 return true; 1058 return false; 1059 } 1060 }; 1061 1062 using ValueBitsMemoizedValue = std::pair<bool, SmallVector<ValueBit, 64>>; 1063 using ValueBitsMemoizer = 1064 DenseMap<SDValue, std::unique_ptr<ValueBitsMemoizedValue>>; 1065 ValueBitsMemoizer Memoizer; 1066 1067 // Return a pair of bool and a SmallVector pointer to a memoization entry. 1068 // The bool is true if something interesting was deduced, otherwise if we're 1069 // providing only a generic representation of V (or something else likewise 1070 // uninteresting for instruction selection) through the SmallVector. 1071 std::pair<bool, SmallVector<ValueBit, 64> *> getValueBits(SDValue V, 1072 unsigned NumBits) { 1073 auto &ValueEntry = Memoizer[V]; 1074 if (ValueEntry) 1075 return std::make_pair(ValueEntry->first, &ValueEntry->second); 1076 ValueEntry.reset(new ValueBitsMemoizedValue()); 1077 bool &Interesting = ValueEntry->first; 1078 SmallVector<ValueBit, 64> &Bits = ValueEntry->second; 1079 Bits.resize(NumBits); 1080 1081 switch (V.getOpcode()) { 1082 default: break; 1083 case ISD::ROTL: 1084 if (isa<ConstantSDNode>(V.getOperand(1))) { 1085 unsigned RotAmt = V.getConstantOperandVal(1); 1086 1087 const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second; 1088 1089 for (unsigned i = 0; i < NumBits; ++i) 1090 Bits[i] = LHSBits[i < RotAmt ? i + (NumBits - RotAmt) : i - RotAmt]; 1091 1092 return std::make_pair(Interesting = true, &Bits); 1093 } 1094 break; 1095 case ISD::SHL: 1096 if (isa<ConstantSDNode>(V.getOperand(1))) { 1097 unsigned ShiftAmt = V.getConstantOperandVal(1); 1098 1099 const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second; 1100 1101 for (unsigned i = ShiftAmt; i < NumBits; ++i) 1102 Bits[i] = LHSBits[i - ShiftAmt]; 1103 1104 for (unsigned i = 0; i < ShiftAmt; ++i) 1105 Bits[i] = ValueBit(ValueBit::ConstZero); 1106 1107 return std::make_pair(Interesting = true, &Bits); 1108 } 1109 break; 1110 case ISD::SRL: 1111 if (isa<ConstantSDNode>(V.getOperand(1))) { 1112 unsigned ShiftAmt = V.getConstantOperandVal(1); 1113 1114 const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second; 1115 1116 for (unsigned i = 0; i < NumBits - ShiftAmt; ++i) 1117 Bits[i] = LHSBits[i + ShiftAmt]; 1118 1119 for (unsigned i = NumBits - ShiftAmt; i < NumBits; ++i) 1120 Bits[i] = ValueBit(ValueBit::ConstZero); 1121 1122 return std::make_pair(Interesting = true, &Bits); 1123 } 1124 break; 1125 case ISD::AND: 1126 if (isa<ConstantSDNode>(V.getOperand(1))) { 1127 uint64_t Mask = V.getConstantOperandVal(1); 1128 1129 const SmallVector<ValueBit, 64> *LHSBits; 1130 // Mark this as interesting, only if the LHS was also interesting. This 1131 // prevents the overall procedure from matching a single immediate 'and' 1132 // (which is non-optimal because such an and might be folded with other 1133 // things if we don't select it here). 1134 std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0), NumBits); 1135 1136 for (unsigned i = 0; i < NumBits; ++i) 1137 if (((Mask >> i) & 1) == 1) 1138 Bits[i] = (*LHSBits)[i]; 1139 else 1140 Bits[i] = ValueBit(ValueBit::ConstZero); 1141 1142 return std::make_pair(Interesting, &Bits); 1143 } 1144 break; 1145 case ISD::OR: { 1146 const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second; 1147 const auto &RHSBits = *getValueBits(V.getOperand(1), NumBits).second; 1148 1149 bool AllDisjoint = true; 1150 for (unsigned i = 0; i < NumBits; ++i) 1151 if (LHSBits[i].isZero()) 1152 Bits[i] = RHSBits[i]; 1153 else if (RHSBits[i].isZero()) 1154 Bits[i] = LHSBits[i]; 1155 else { 1156 AllDisjoint = false; 1157 break; 1158 } 1159 1160 if (!AllDisjoint) 1161 break; 1162 1163 return std::make_pair(Interesting = true, &Bits); 1164 } 1165 case ISD::ZERO_EXTEND: { 1166 // We support only the case with zero extension from i32 to i64 so far. 1167 if (V.getValueType() != MVT::i64 || 1168 V.getOperand(0).getValueType() != MVT::i32) 1169 break; 1170 1171 const SmallVector<ValueBit, 64> *LHSBits; 1172 const unsigned NumOperandBits = 32; 1173 std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0), 1174 NumOperandBits); 1175 1176 for (unsigned i = 0; i < NumOperandBits; ++i) 1177 Bits[i] = (*LHSBits)[i]; 1178 1179 for (unsigned i = NumOperandBits; i < NumBits; ++i) 1180 Bits[i] = ValueBit(ValueBit::ConstZero); 1181 1182 return std::make_pair(Interesting, &Bits); 1183 } 1184 } 1185 1186 for (unsigned i = 0; i < NumBits; ++i) 1187 Bits[i] = ValueBit(V, i); 1188 1189 return std::make_pair(Interesting = false, &Bits); 1190 } 1191 1192 // For each value (except the constant ones), compute the left-rotate amount 1193 // to get it from its original to final position. 1194 void computeRotationAmounts() { 1195 HasZeros = false; 1196 RLAmt.resize(Bits.size()); 1197 for (unsigned i = 0; i < Bits.size(); ++i) 1198 if (Bits[i].hasValue()) { 1199 unsigned VBI = Bits[i].getValueBitIndex(); 1200 if (i >= VBI) 1201 RLAmt[i] = i - VBI; 1202 else 1203 RLAmt[i] = Bits.size() - (VBI - i); 1204 } else if (Bits[i].isZero()) { 1205 HasZeros = true; 1206 RLAmt[i] = UINT32_MAX; 1207 } else { 1208 llvm_unreachable("Unknown value bit type"); 1209 } 1210 } 1211 1212 // Collect groups of consecutive bits with the same underlying value and 1213 // rotation factor. If we're doing late masking, we ignore zeros, otherwise 1214 // they break up groups. 1215 void collectBitGroups(bool LateMask) { 1216 BitGroups.clear(); 1217 1218 unsigned LastRLAmt = RLAmt[0]; 1219 SDValue LastValue = Bits[0].hasValue() ? Bits[0].getValue() : SDValue(); 1220 unsigned LastGroupStartIdx = 0; 1221 for (unsigned i = 1; i < Bits.size(); ++i) { 1222 unsigned ThisRLAmt = RLAmt[i]; 1223 SDValue ThisValue = Bits[i].hasValue() ? Bits[i].getValue() : SDValue(); 1224 if (LateMask && !ThisValue) { 1225 ThisValue = LastValue; 1226 ThisRLAmt = LastRLAmt; 1227 // If we're doing late masking, then the first bit group always starts 1228 // at zero (even if the first bits were zero). 1229 if (BitGroups.empty()) 1230 LastGroupStartIdx = 0; 1231 } 1232 1233 // If this bit has the same underlying value and the same rotate factor as 1234 // the last one, then they're part of the same group. 1235 if (ThisRLAmt == LastRLAmt && ThisValue == LastValue) 1236 continue; 1237 1238 if (LastValue.getNode()) 1239 BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx, 1240 i-1)); 1241 LastRLAmt = ThisRLAmt; 1242 LastValue = ThisValue; 1243 LastGroupStartIdx = i; 1244 } 1245 if (LastValue.getNode()) 1246 BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx, 1247 Bits.size()-1)); 1248 1249 if (BitGroups.empty()) 1250 return; 1251 1252 // We might be able to combine the first and last groups. 1253 if (BitGroups.size() > 1) { 1254 // If the first and last groups are the same, then remove the first group 1255 // in favor of the last group, making the ending index of the last group 1256 // equal to the ending index of the to-be-removed first group. 1257 if (BitGroups[0].StartIdx == 0 && 1258 BitGroups[BitGroups.size()-1].EndIdx == Bits.size()-1 && 1259 BitGroups[0].V == BitGroups[BitGroups.size()-1].V && 1260 BitGroups[0].RLAmt == BitGroups[BitGroups.size()-1].RLAmt) { 1261 DEBUG(dbgs() << "\tcombining final bit group with initial one\n"); 1262 BitGroups[BitGroups.size()-1].EndIdx = BitGroups[0].EndIdx; 1263 BitGroups.erase(BitGroups.begin()); 1264 } 1265 } 1266 } 1267 1268 // Take all (SDValue, RLAmt) pairs and sort them by the number of groups 1269 // associated with each. If there is a degeneracy, pick the one that occurs 1270 // first (in the final value). 1271 void collectValueRotInfo() { 1272 ValueRots.clear(); 1273 1274 for (auto &BG : BitGroups) { 1275 unsigned RLAmtKey = BG.RLAmt + (BG.Repl32 ? 64 : 0); 1276 ValueRotInfo &VRI = ValueRots[std::make_pair(BG.V, RLAmtKey)]; 1277 VRI.V = BG.V; 1278 VRI.RLAmt = BG.RLAmt; 1279 VRI.Repl32 = BG.Repl32; 1280 VRI.NumGroups += 1; 1281 VRI.FirstGroupStartIdx = std::min(VRI.FirstGroupStartIdx, BG.StartIdx); 1282 } 1283 1284 // Now that we've collected the various ValueRotInfo instances, we need to 1285 // sort them. 1286 ValueRotsVec.clear(); 1287 for (auto &I : ValueRots) { 1288 ValueRotsVec.push_back(I.second); 1289 } 1290 std::sort(ValueRotsVec.begin(), ValueRotsVec.end()); 1291 } 1292 1293 // In 64-bit mode, rlwinm and friends have a rotation operator that 1294 // replicates the low-order 32 bits into the high-order 32-bits. The mask 1295 // indices of these instructions can only be in the lower 32 bits, so they 1296 // can only represent some 64-bit bit groups. However, when they can be used, 1297 // the 32-bit replication can be used to represent, as a single bit group, 1298 // otherwise separate bit groups. We'll convert to replicated-32-bit bit 1299 // groups when possible. Returns true if any of the bit groups were 1300 // converted. 1301 void assignRepl32BitGroups() { 1302 // If we have bits like this: 1303 // 1304 // Indices: 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 1305 // V bits: ... 7 6 5 4 3 2 1 0 31 30 29 28 27 26 25 24 1306 // Groups: | RLAmt = 8 | RLAmt = 40 | 1307 // 1308 // But, making use of a 32-bit operation that replicates the low-order 32 1309 // bits into the high-order 32 bits, this can be one bit group with a RLAmt 1310 // of 8. 1311 1312 auto IsAllLow32 = [this](BitGroup & BG) { 1313 if (BG.StartIdx <= BG.EndIdx) { 1314 for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i) { 1315 if (!Bits[i].hasValue()) 1316 continue; 1317 if (Bits[i].getValueBitIndex() >= 32) 1318 return false; 1319 } 1320 } else { 1321 for (unsigned i = BG.StartIdx; i < Bits.size(); ++i) { 1322 if (!Bits[i].hasValue()) 1323 continue; 1324 if (Bits[i].getValueBitIndex() >= 32) 1325 return false; 1326 } 1327 for (unsigned i = 0; i <= BG.EndIdx; ++i) { 1328 if (!Bits[i].hasValue()) 1329 continue; 1330 if (Bits[i].getValueBitIndex() >= 32) 1331 return false; 1332 } 1333 } 1334 1335 return true; 1336 }; 1337 1338 for (auto &BG : BitGroups) { 1339 if (BG.StartIdx < 32 && BG.EndIdx < 32) { 1340 if (IsAllLow32(BG)) { 1341 if (BG.RLAmt >= 32) { 1342 BG.RLAmt -= 32; 1343 BG.Repl32CR = true; 1344 } 1345 1346 BG.Repl32 = true; 1347 1348 DEBUG(dbgs() << "\t32-bit replicated bit group for " << 1349 BG.V.getNode() << " RLAmt = " << BG.RLAmt << 1350 " [" << BG.StartIdx << ", " << BG.EndIdx << "]\n"); 1351 } 1352 } 1353 } 1354 1355 // Now walk through the bit groups, consolidating where possible. 1356 for (auto I = BitGroups.begin(); I != BitGroups.end();) { 1357 // We might want to remove this bit group by merging it with the previous 1358 // group (which might be the ending group). 1359 auto IP = (I == BitGroups.begin()) ? 1360 std::prev(BitGroups.end()) : std::prev(I); 1361 if (I->Repl32 && IP->Repl32 && I->V == IP->V && I->RLAmt == IP->RLAmt && 1362 I->StartIdx == (IP->EndIdx + 1) % 64 && I != IP) { 1363 1364 DEBUG(dbgs() << "\tcombining 32-bit replicated bit group for " << 1365 I->V.getNode() << " RLAmt = " << I->RLAmt << 1366 " [" << I->StartIdx << ", " << I->EndIdx << 1367 "] with group with range [" << 1368 IP->StartIdx << ", " << IP->EndIdx << "]\n"); 1369 1370 IP->EndIdx = I->EndIdx; 1371 IP->Repl32CR = IP->Repl32CR || I->Repl32CR; 1372 IP->Repl32Coalesced = true; 1373 I = BitGroups.erase(I); 1374 continue; 1375 } else { 1376 // There is a special case worth handling: If there is a single group 1377 // covering the entire upper 32 bits, and it can be merged with both 1378 // the next and previous groups (which might be the same group), then 1379 // do so. If it is the same group (so there will be only one group in 1380 // total), then we need to reverse the order of the range so that it 1381 // covers the entire 64 bits. 1382 if (I->StartIdx == 32 && I->EndIdx == 63) { 1383 assert(std::next(I) == BitGroups.end() && 1384 "bit group ends at index 63 but there is another?"); 1385 auto IN = BitGroups.begin(); 1386 1387 if (IP->Repl32 && IN->Repl32 && I->V == IP->V && I->V == IN->V && 1388 (I->RLAmt % 32) == IP->RLAmt && (I->RLAmt % 32) == IN->RLAmt && 1389 IP->EndIdx == 31 && IN->StartIdx == 0 && I != IP && 1390 IsAllLow32(*I)) { 1391 1392 DEBUG(dbgs() << "\tcombining bit group for " << 1393 I->V.getNode() << " RLAmt = " << I->RLAmt << 1394 " [" << I->StartIdx << ", " << I->EndIdx << 1395 "] with 32-bit replicated groups with ranges [" << 1396 IP->StartIdx << ", " << IP->EndIdx << "] and [" << 1397 IN->StartIdx << ", " << IN->EndIdx << "]\n"); 1398 1399 if (IP == IN) { 1400 // There is only one other group; change it to cover the whole 1401 // range (backward, so that it can still be Repl32 but cover the 1402 // whole 64-bit range). 1403 IP->StartIdx = 31; 1404 IP->EndIdx = 30; 1405 IP->Repl32CR = IP->Repl32CR || I->RLAmt >= 32; 1406 IP->Repl32Coalesced = true; 1407 I = BitGroups.erase(I); 1408 } else { 1409 // There are two separate groups, one before this group and one 1410 // after us (at the beginning). We're going to remove this group, 1411 // but also the group at the very beginning. 1412 IP->EndIdx = IN->EndIdx; 1413 IP->Repl32CR = IP->Repl32CR || IN->Repl32CR || I->RLAmt >= 32; 1414 IP->Repl32Coalesced = true; 1415 I = BitGroups.erase(I); 1416 BitGroups.erase(BitGroups.begin()); 1417 } 1418 1419 // This must be the last group in the vector (and we might have 1420 // just invalidated the iterator above), so break here. 1421 break; 1422 } 1423 } 1424 } 1425 1426 ++I; 1427 } 1428 } 1429 1430 SDValue getI32Imm(unsigned Imm, const SDLoc &dl) { 1431 return CurDAG->getTargetConstant(Imm, dl, MVT::i32); 1432 } 1433 1434 uint64_t getZerosMask() { 1435 uint64_t Mask = 0; 1436 for (unsigned i = 0; i < Bits.size(); ++i) { 1437 if (Bits[i].hasValue()) 1438 continue; 1439 Mask |= (UINT64_C(1) << i); 1440 } 1441 1442 return ~Mask; 1443 } 1444 1445 // This method extends an input value to 64 bit if input is 32-bit integer. 1446 // While selecting instructions in BitPermutationSelector in 64-bit mode, 1447 // an input value can be a 32-bit integer if a ZERO_EXTEND node is included. 1448 // In such case, we extend it to 64 bit to be consistent with other values. 1449 SDValue ExtendToInt64(SDValue V, const SDLoc &dl) { 1450 if (V.getValueSizeInBits() == 64) 1451 return V; 1452 1453 assert(V.getValueSizeInBits() == 32); 1454 SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32); 1455 SDValue ImDef = SDValue(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, 1456 MVT::i64), 0); 1457 SDValue ExtVal = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, 1458 MVT::i64, ImDef, V, 1459 SubRegIdx), 0); 1460 return ExtVal; 1461 } 1462 1463 // Depending on the number of groups for a particular value, it might be 1464 // better to rotate, mask explicitly (using andi/andis), and then or the 1465 // result. Select this part of the result first. 1466 void SelectAndParts32(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) { 1467 if (BPermRewriterNoMasking) 1468 return; 1469 1470 for (ValueRotInfo &VRI : ValueRotsVec) { 1471 unsigned Mask = 0; 1472 for (unsigned i = 0; i < Bits.size(); ++i) { 1473 if (!Bits[i].hasValue() || Bits[i].getValue() != VRI.V) 1474 continue; 1475 if (RLAmt[i] != VRI.RLAmt) 1476 continue; 1477 Mask |= (1u << i); 1478 } 1479 1480 // Compute the masks for andi/andis that would be necessary. 1481 unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16; 1482 assert((ANDIMask != 0 || ANDISMask != 0) && 1483 "No set bits in mask for value bit groups"); 1484 bool NeedsRotate = VRI.RLAmt != 0; 1485 1486 // We're trying to minimize the number of instructions. If we have one 1487 // group, using one of andi/andis can break even. If we have three 1488 // groups, we can use both andi and andis and break even (to use both 1489 // andi and andis we also need to or the results together). We need four 1490 // groups if we also need to rotate. To use andi/andis we need to do more 1491 // than break even because rotate-and-mask instructions tend to be easier 1492 // to schedule. 1493 1494 // FIXME: We've biased here against using andi/andis, which is right for 1495 // POWER cores, but not optimal everywhere. For example, on the A2, 1496 // andi/andis have single-cycle latency whereas the rotate-and-mask 1497 // instructions take two cycles, and it would be better to bias toward 1498 // andi/andis in break-even cases. 1499 1500 unsigned NumAndInsts = (unsigned) NeedsRotate + 1501 (unsigned) (ANDIMask != 0) + 1502 (unsigned) (ANDISMask != 0) + 1503 (unsigned) (ANDIMask != 0 && ANDISMask != 0) + 1504 (unsigned) (bool) Res; 1505 1506 DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode() << 1507 " RL: " << VRI.RLAmt << ":" << 1508 "\n\t\t\tisel using masking: " << NumAndInsts << 1509 " using rotates: " << VRI.NumGroups << "\n"); 1510 1511 if (NumAndInsts >= VRI.NumGroups) 1512 continue; 1513 1514 DEBUG(dbgs() << "\t\t\t\tusing masking\n"); 1515 1516 if (InstCnt) *InstCnt += NumAndInsts; 1517 1518 SDValue VRot; 1519 if (VRI.RLAmt) { 1520 SDValue Ops[] = 1521 { VRI.V, getI32Imm(VRI.RLAmt, dl), getI32Imm(0, dl), 1522 getI32Imm(31, dl) }; 1523 VRot = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, 1524 Ops), 0); 1525 } else { 1526 VRot = VRI.V; 1527 } 1528 1529 SDValue ANDIVal, ANDISVal; 1530 if (ANDIMask != 0) 1531 ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo, dl, MVT::i32, 1532 VRot, getI32Imm(ANDIMask, dl)), 0); 1533 if (ANDISMask != 0) 1534 ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo, dl, MVT::i32, 1535 VRot, getI32Imm(ANDISMask, dl)), 0); 1536 1537 SDValue TotalVal; 1538 if (!ANDIVal) 1539 TotalVal = ANDISVal; 1540 else if (!ANDISVal) 1541 TotalVal = ANDIVal; 1542 else 1543 TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32, 1544 ANDIVal, ANDISVal), 0); 1545 1546 if (!Res) 1547 Res = TotalVal; 1548 else 1549 Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32, 1550 Res, TotalVal), 0); 1551 1552 // Now, remove all groups with this underlying value and rotation 1553 // factor. 1554 eraseMatchingBitGroups([VRI](const BitGroup &BG) { 1555 return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt; 1556 }); 1557 } 1558 } 1559 1560 // Instruction selection for the 32-bit case. 1561 SDNode *Select32(SDNode *N, bool LateMask, unsigned *InstCnt) { 1562 SDLoc dl(N); 1563 SDValue Res; 1564 1565 if (InstCnt) *InstCnt = 0; 1566 1567 // Take care of cases that should use andi/andis first. 1568 SelectAndParts32(dl, Res, InstCnt); 1569 1570 // If we've not yet selected a 'starting' instruction, and we have no zeros 1571 // to fill in, select the (Value, RLAmt) with the highest priority (largest 1572 // number of groups), and start with this rotated value. 1573 if ((!HasZeros || LateMask) && !Res) { 1574 ValueRotInfo &VRI = ValueRotsVec[0]; 1575 if (VRI.RLAmt) { 1576 if (InstCnt) *InstCnt += 1; 1577 SDValue Ops[] = 1578 { VRI.V, getI32Imm(VRI.RLAmt, dl), getI32Imm(0, dl), 1579 getI32Imm(31, dl) }; 1580 Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 1581 0); 1582 } else { 1583 Res = VRI.V; 1584 } 1585 1586 // Now, remove all groups with this underlying value and rotation factor. 1587 eraseMatchingBitGroups([VRI](const BitGroup &BG) { 1588 return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt; 1589 }); 1590 } 1591 1592 if (InstCnt) *InstCnt += BitGroups.size(); 1593 1594 // Insert the other groups (one at a time). 1595 for (auto &BG : BitGroups) { 1596 if (!Res) { 1597 SDValue Ops[] = 1598 { BG.V, getI32Imm(BG.RLAmt, dl), 1599 getI32Imm(Bits.size() - BG.EndIdx - 1, dl), 1600 getI32Imm(Bits.size() - BG.StartIdx - 1, dl) }; 1601 Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0); 1602 } else { 1603 SDValue Ops[] = 1604 { Res, BG.V, getI32Imm(BG.RLAmt, dl), 1605 getI32Imm(Bits.size() - BG.EndIdx - 1, dl), 1606 getI32Imm(Bits.size() - BG.StartIdx - 1, dl) }; 1607 Res = SDValue(CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops), 0); 1608 } 1609 } 1610 1611 if (LateMask) { 1612 unsigned Mask = (unsigned) getZerosMask(); 1613 1614 unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16; 1615 assert((ANDIMask != 0 || ANDISMask != 0) && 1616 "No set bits in zeros mask?"); 1617 1618 if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) + 1619 (unsigned) (ANDISMask != 0) + 1620 (unsigned) (ANDIMask != 0 && ANDISMask != 0); 1621 1622 SDValue ANDIVal, ANDISVal; 1623 if (ANDIMask != 0) 1624 ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo, dl, MVT::i32, 1625 Res, getI32Imm(ANDIMask, dl)), 0); 1626 if (ANDISMask != 0) 1627 ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo, dl, MVT::i32, 1628 Res, getI32Imm(ANDISMask, dl)), 0); 1629 1630 if (!ANDIVal) 1631 Res = ANDISVal; 1632 else if (!ANDISVal) 1633 Res = ANDIVal; 1634 else 1635 Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32, 1636 ANDIVal, ANDISVal), 0); 1637 } 1638 1639 return Res.getNode(); 1640 } 1641 1642 unsigned SelectRotMask64Count(unsigned RLAmt, bool Repl32, 1643 unsigned MaskStart, unsigned MaskEnd, 1644 bool IsIns) { 1645 // In the notation used by the instructions, 'start' and 'end' are reversed 1646 // because bits are counted from high to low order. 1647 unsigned InstMaskStart = 64 - MaskEnd - 1, 1648 InstMaskEnd = 64 - MaskStart - 1; 1649 1650 if (Repl32) 1651 return 1; 1652 1653 if ((!IsIns && (InstMaskEnd == 63 || InstMaskStart == 0)) || 1654 InstMaskEnd == 63 - RLAmt) 1655 return 1; 1656 1657 return 2; 1658 } 1659 1660 // For 64-bit values, not all combinations of rotates and masks are 1661 // available. Produce one if it is available. 1662 SDValue SelectRotMask64(SDValue V, const SDLoc &dl, unsigned RLAmt, 1663 bool Repl32, unsigned MaskStart, unsigned MaskEnd, 1664 unsigned *InstCnt = nullptr) { 1665 // In the notation used by the instructions, 'start' and 'end' are reversed 1666 // because bits are counted from high to low order. 1667 unsigned InstMaskStart = 64 - MaskEnd - 1, 1668 InstMaskEnd = 64 - MaskStart - 1; 1669 1670 if (InstCnt) *InstCnt += 1; 1671 1672 if (Repl32) { 1673 // This rotation amount assumes that the lower 32 bits of the quantity 1674 // are replicated in the high 32 bits by the rotation operator (which is 1675 // done by rlwinm and friends). 1676 assert(InstMaskStart >= 32 && "Mask cannot start out of range"); 1677 assert(InstMaskEnd >= 32 && "Mask cannot end out of range"); 1678 SDValue Ops[] = 1679 { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl), 1680 getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) }; 1681 return SDValue(CurDAG->getMachineNode(PPC::RLWINM8, dl, MVT::i64, 1682 Ops), 0); 1683 } 1684 1685 if (InstMaskEnd == 63) { 1686 SDValue Ops[] = 1687 { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl), 1688 getI32Imm(InstMaskStart, dl) }; 1689 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Ops), 0); 1690 } 1691 1692 if (InstMaskStart == 0) { 1693 SDValue Ops[] = 1694 { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl), 1695 getI32Imm(InstMaskEnd, dl) }; 1696 return SDValue(CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Ops), 0); 1697 } 1698 1699 if (InstMaskEnd == 63 - RLAmt) { 1700 SDValue Ops[] = 1701 { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl), 1702 getI32Imm(InstMaskStart, dl) }; 1703 return SDValue(CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, Ops), 0); 1704 } 1705 1706 // We cannot do this with a single instruction, so we'll use two. The 1707 // problem is that we're not free to choose both a rotation amount and mask 1708 // start and end independently. We can choose an arbitrary mask start and 1709 // end, but then the rotation amount is fixed. Rotation, however, can be 1710 // inverted, and so by applying an "inverse" rotation first, we can get the 1711 // desired result. 1712 if (InstCnt) *InstCnt += 1; 1713 1714 // The rotation mask for the second instruction must be MaskStart. 1715 unsigned RLAmt2 = MaskStart; 1716 // The first instruction must rotate V so that the overall rotation amount 1717 // is RLAmt. 1718 unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64; 1719 if (RLAmt1) 1720 V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63); 1721 return SelectRotMask64(V, dl, RLAmt2, false, MaskStart, MaskEnd); 1722 } 1723 1724 // For 64-bit values, not all combinations of rotates and masks are 1725 // available. Produce a rotate-mask-and-insert if one is available. 1726 SDValue SelectRotMaskIns64(SDValue Base, SDValue V, const SDLoc &dl, 1727 unsigned RLAmt, bool Repl32, unsigned MaskStart, 1728 unsigned MaskEnd, unsigned *InstCnt = nullptr) { 1729 // In the notation used by the instructions, 'start' and 'end' are reversed 1730 // because bits are counted from high to low order. 1731 unsigned InstMaskStart = 64 - MaskEnd - 1, 1732 InstMaskEnd = 64 - MaskStart - 1; 1733 1734 if (InstCnt) *InstCnt += 1; 1735 1736 if (Repl32) { 1737 // This rotation amount assumes that the lower 32 bits of the quantity 1738 // are replicated in the high 32 bits by the rotation operator (which is 1739 // done by rlwinm and friends). 1740 assert(InstMaskStart >= 32 && "Mask cannot start out of range"); 1741 assert(InstMaskEnd >= 32 && "Mask cannot end out of range"); 1742 SDValue Ops[] = 1743 { ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl), 1744 getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) }; 1745 return SDValue(CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64, 1746 Ops), 0); 1747 } 1748 1749 if (InstMaskEnd == 63 - RLAmt) { 1750 SDValue Ops[] = 1751 { ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl), 1752 getI32Imm(InstMaskStart, dl) }; 1753 return SDValue(CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops), 0); 1754 } 1755 1756 // We cannot do this with a single instruction, so we'll use two. The 1757 // problem is that we're not free to choose both a rotation amount and mask 1758 // start and end independently. We can choose an arbitrary mask start and 1759 // end, but then the rotation amount is fixed. Rotation, however, can be 1760 // inverted, and so by applying an "inverse" rotation first, we can get the 1761 // desired result. 1762 if (InstCnt) *InstCnt += 1; 1763 1764 // The rotation mask for the second instruction must be MaskStart. 1765 unsigned RLAmt2 = MaskStart; 1766 // The first instruction must rotate V so that the overall rotation amount 1767 // is RLAmt. 1768 unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64; 1769 if (RLAmt1) 1770 V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63); 1771 return SelectRotMaskIns64(Base, V, dl, RLAmt2, false, MaskStart, MaskEnd); 1772 } 1773 1774 void SelectAndParts64(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) { 1775 if (BPermRewriterNoMasking) 1776 return; 1777 1778 // The idea here is the same as in the 32-bit version, but with additional 1779 // complications from the fact that Repl32 might be true. Because we 1780 // aggressively convert bit groups to Repl32 form (which, for small 1781 // rotation factors, involves no other change), and then coalesce, it might 1782 // be the case that a single 64-bit masking operation could handle both 1783 // some Repl32 groups and some non-Repl32 groups. If converting to Repl32 1784 // form allowed coalescing, then we must use a 32-bit rotaton in order to 1785 // completely capture the new combined bit group. 1786 1787 for (ValueRotInfo &VRI : ValueRotsVec) { 1788 uint64_t Mask = 0; 1789 1790 // We need to add to the mask all bits from the associated bit groups. 1791 // If Repl32 is false, we need to add bits from bit groups that have 1792 // Repl32 true, but are trivially convertable to Repl32 false. Such a 1793 // group is trivially convertable if it overlaps only with the lower 32 1794 // bits, and the group has not been coalesced. 1795 auto MatchingBG = [VRI](const BitGroup &BG) { 1796 if (VRI.V != BG.V) 1797 return false; 1798 1799 unsigned EffRLAmt = BG.RLAmt; 1800 if (!VRI.Repl32 && BG.Repl32) { 1801 if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx <= BG.EndIdx && 1802 !BG.Repl32Coalesced) { 1803 if (BG.Repl32CR) 1804 EffRLAmt += 32; 1805 } else { 1806 return false; 1807 } 1808 } else if (VRI.Repl32 != BG.Repl32) { 1809 return false; 1810 } 1811 1812 return VRI.RLAmt == EffRLAmt; 1813 }; 1814 1815 for (auto &BG : BitGroups) { 1816 if (!MatchingBG(BG)) 1817 continue; 1818 1819 if (BG.StartIdx <= BG.EndIdx) { 1820 for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i) 1821 Mask |= (UINT64_C(1) << i); 1822 } else { 1823 for (unsigned i = BG.StartIdx; i < Bits.size(); ++i) 1824 Mask |= (UINT64_C(1) << i); 1825 for (unsigned i = 0; i <= BG.EndIdx; ++i) 1826 Mask |= (UINT64_C(1) << i); 1827 } 1828 } 1829 1830 // We can use the 32-bit andi/andis technique if the mask does not 1831 // require any higher-order bits. This can save an instruction compared 1832 // to always using the general 64-bit technique. 1833 bool Use32BitInsts = isUInt<32>(Mask); 1834 // Compute the masks for andi/andis that would be necessary. 1835 unsigned ANDIMask = (Mask & UINT16_MAX), 1836 ANDISMask = (Mask >> 16) & UINT16_MAX; 1837 1838 bool NeedsRotate = VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask)); 1839 1840 unsigned NumAndInsts = (unsigned) NeedsRotate + 1841 (unsigned) (bool) Res; 1842 if (Use32BitInsts) 1843 NumAndInsts += (unsigned) (ANDIMask != 0) + (unsigned) (ANDISMask != 0) + 1844 (unsigned) (ANDIMask != 0 && ANDISMask != 0); 1845 else 1846 NumAndInsts += selectI64ImmInstrCount(Mask) + /* and */ 1; 1847 1848 unsigned NumRLInsts = 0; 1849 bool FirstBG = true; 1850 bool MoreBG = false; 1851 for (auto &BG : BitGroups) { 1852 if (!MatchingBG(BG)) { 1853 MoreBG = true; 1854 continue; 1855 } 1856 NumRLInsts += 1857 SelectRotMask64Count(BG.RLAmt, BG.Repl32, BG.StartIdx, BG.EndIdx, 1858 !FirstBG); 1859 FirstBG = false; 1860 } 1861 1862 DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode() << 1863 " RL: " << VRI.RLAmt << (VRI.Repl32 ? " (32):" : ":") << 1864 "\n\t\t\tisel using masking: " << NumAndInsts << 1865 " using rotates: " << NumRLInsts << "\n"); 1866 1867 // When we'd use andi/andis, we bias toward using the rotates (andi only 1868 // has a record form, and is cracked on POWER cores). However, when using 1869 // general 64-bit constant formation, bias toward the constant form, 1870 // because that exposes more opportunities for CSE. 1871 if (NumAndInsts > NumRLInsts) 1872 continue; 1873 // When merging multiple bit groups, instruction or is used. 1874 // But when rotate is used, rldimi can inert the rotated value into any 1875 // register, so instruction or can be avoided. 1876 if ((Use32BitInsts || MoreBG) && NumAndInsts == NumRLInsts) 1877 continue; 1878 1879 DEBUG(dbgs() << "\t\t\t\tusing masking\n"); 1880 1881 if (InstCnt) *InstCnt += NumAndInsts; 1882 1883 SDValue VRot; 1884 // We actually need to generate a rotation if we have a non-zero rotation 1885 // factor or, in the Repl32 case, if we care about any of the 1886 // higher-order replicated bits. In the latter case, we generate a mask 1887 // backward so that it actually includes the entire 64 bits. 1888 if (VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask))) 1889 VRot = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32, 1890 VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63); 1891 else 1892 VRot = VRI.V; 1893 1894 SDValue TotalVal; 1895 if (Use32BitInsts) { 1896 assert((ANDIMask != 0 || ANDISMask != 0) && 1897 "No set bits in mask when using 32-bit ands for 64-bit value"); 1898 1899 SDValue ANDIVal, ANDISVal; 1900 if (ANDIMask != 0) 1901 ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo8, dl, MVT::i64, 1902 ExtendToInt64(VRot, dl), 1903 getI32Imm(ANDIMask, dl)), 1904 0); 1905 if (ANDISMask != 0) 1906 ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo8, dl, MVT::i64, 1907 ExtendToInt64(VRot, dl), 1908 getI32Imm(ANDISMask, dl)), 1909 0); 1910 1911 if (!ANDIVal) 1912 TotalVal = ANDISVal; 1913 else if (!ANDISVal) 1914 TotalVal = ANDIVal; 1915 else 1916 TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64, 1917 ExtendToInt64(ANDIVal, dl), ANDISVal), 0); 1918 } else { 1919 TotalVal = SDValue(selectI64Imm(CurDAG, dl, Mask), 0); 1920 TotalVal = 1921 SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64, 1922 ExtendToInt64(VRot, dl), TotalVal), 1923 0); 1924 } 1925 1926 if (!Res) 1927 Res = TotalVal; 1928 else 1929 Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64, 1930 ExtendToInt64(Res, dl), TotalVal), 1931 0); 1932 1933 // Now, remove all groups with this underlying value and rotation 1934 // factor. 1935 eraseMatchingBitGroups(MatchingBG); 1936 } 1937 } 1938 1939 // Instruction selection for the 64-bit case. 1940 SDNode *Select64(SDNode *N, bool LateMask, unsigned *InstCnt) { 1941 SDLoc dl(N); 1942 SDValue Res; 1943 1944 if (InstCnt) *InstCnt = 0; 1945 1946 // Take care of cases that should use andi/andis first. 1947 SelectAndParts64(dl, Res, InstCnt); 1948 1949 // If we've not yet selected a 'starting' instruction, and we have no zeros 1950 // to fill in, select the (Value, RLAmt) with the highest priority (largest 1951 // number of groups), and start with this rotated value. 1952 if ((!HasZeros || LateMask) && !Res) { 1953 // If we have both Repl32 groups and non-Repl32 groups, the non-Repl32 1954 // groups will come first, and so the VRI representing the largest number 1955 // of groups might not be first (it might be the first Repl32 groups). 1956 unsigned MaxGroupsIdx = 0; 1957 if (!ValueRotsVec[0].Repl32) { 1958 for (unsigned i = 0, ie = ValueRotsVec.size(); i < ie; ++i) 1959 if (ValueRotsVec[i].Repl32) { 1960 if (ValueRotsVec[i].NumGroups > ValueRotsVec[0].NumGroups) 1961 MaxGroupsIdx = i; 1962 break; 1963 } 1964 } 1965 1966 ValueRotInfo &VRI = ValueRotsVec[MaxGroupsIdx]; 1967 bool NeedsRotate = false; 1968 if (VRI.RLAmt) { 1969 NeedsRotate = true; 1970 } else if (VRI.Repl32) { 1971 for (auto &BG : BitGroups) { 1972 if (BG.V != VRI.V || BG.RLAmt != VRI.RLAmt || 1973 BG.Repl32 != VRI.Repl32) 1974 continue; 1975 1976 // We don't need a rotate if the bit group is confined to the lower 1977 // 32 bits. 1978 if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx < BG.EndIdx) 1979 continue; 1980 1981 NeedsRotate = true; 1982 break; 1983 } 1984 } 1985 1986 if (NeedsRotate) 1987 Res = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32, 1988 VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63, 1989 InstCnt); 1990 else 1991 Res = VRI.V; 1992 1993 // Now, remove all groups with this underlying value and rotation factor. 1994 if (Res) 1995 eraseMatchingBitGroups([VRI](const BitGroup &BG) { 1996 return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt && 1997 BG.Repl32 == VRI.Repl32; 1998 }); 1999 } 2000 2001 // Because 64-bit rotates are more flexible than inserts, we might have a 2002 // preference regarding which one we do first (to save one instruction). 2003 if (!Res) 2004 for (auto I = BitGroups.begin(), IE = BitGroups.end(); I != IE; ++I) { 2005 if (SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx, 2006 false) < 2007 SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx, 2008 true)) { 2009 if (I != BitGroups.begin()) { 2010 BitGroup BG = *I; 2011 BitGroups.erase(I); 2012 BitGroups.insert(BitGroups.begin(), BG); 2013 } 2014 2015 break; 2016 } 2017 } 2018 2019 // Insert the other groups (one at a time). 2020 for (auto &BG : BitGroups) { 2021 if (!Res) 2022 Res = SelectRotMask64(BG.V, dl, BG.RLAmt, BG.Repl32, BG.StartIdx, 2023 BG.EndIdx, InstCnt); 2024 else 2025 Res = SelectRotMaskIns64(Res, BG.V, dl, BG.RLAmt, BG.Repl32, 2026 BG.StartIdx, BG.EndIdx, InstCnt); 2027 } 2028 2029 if (LateMask) { 2030 uint64_t Mask = getZerosMask(); 2031 2032 // We can use the 32-bit andi/andis technique if the mask does not 2033 // require any higher-order bits. This can save an instruction compared 2034 // to always using the general 64-bit technique. 2035 bool Use32BitInsts = isUInt<32>(Mask); 2036 // Compute the masks for andi/andis that would be necessary. 2037 unsigned ANDIMask = (Mask & UINT16_MAX), 2038 ANDISMask = (Mask >> 16) & UINT16_MAX; 2039 2040 if (Use32BitInsts) { 2041 assert((ANDIMask != 0 || ANDISMask != 0) && 2042 "No set bits in mask when using 32-bit ands for 64-bit value"); 2043 2044 if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) + 2045 (unsigned) (ANDISMask != 0) + 2046 (unsigned) (ANDIMask != 0 && ANDISMask != 0); 2047 2048 SDValue ANDIVal, ANDISVal; 2049 if (ANDIMask != 0) 2050 ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDIo8, dl, MVT::i64, 2051 ExtendToInt64(Res, dl), getI32Imm(ANDIMask, dl)), 0); 2052 if (ANDISMask != 0) 2053 ANDISVal = SDValue(CurDAG->getMachineNode(PPC::ANDISo8, dl, MVT::i64, 2054 ExtendToInt64(Res, dl), getI32Imm(ANDISMask, dl)), 0); 2055 2056 if (!ANDIVal) 2057 Res = ANDISVal; 2058 else if (!ANDISVal) 2059 Res = ANDIVal; 2060 else 2061 Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64, 2062 ExtendToInt64(ANDIVal, dl), ANDISVal), 0); 2063 } else { 2064 if (InstCnt) *InstCnt += selectI64ImmInstrCount(Mask) + /* and */ 1; 2065 2066 SDValue MaskVal = SDValue(selectI64Imm(CurDAG, dl, Mask), 0); 2067 Res = 2068 SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64, 2069 ExtendToInt64(Res, dl), MaskVal), 0); 2070 } 2071 } 2072 2073 return Res.getNode(); 2074 } 2075 2076 SDNode *Select(SDNode *N, bool LateMask, unsigned *InstCnt = nullptr) { 2077 // Fill in BitGroups. 2078 collectBitGroups(LateMask); 2079 if (BitGroups.empty()) 2080 return nullptr; 2081 2082 // For 64-bit values, figure out when we can use 32-bit instructions. 2083 if (Bits.size() == 64) 2084 assignRepl32BitGroups(); 2085 2086 // Fill in ValueRotsVec. 2087 collectValueRotInfo(); 2088 2089 if (Bits.size() == 32) { 2090 return Select32(N, LateMask, InstCnt); 2091 } else { 2092 assert(Bits.size() == 64 && "Not 64 bits here?"); 2093 return Select64(N, LateMask, InstCnt); 2094 } 2095 2096 return nullptr; 2097 } 2098 2099 void eraseMatchingBitGroups(function_ref<bool(const BitGroup &)> F) { 2100 BitGroups.erase(remove_if(BitGroups, F), BitGroups.end()); 2101 } 2102 2103 SmallVector<ValueBit, 64> Bits; 2104 2105 bool HasZeros; 2106 SmallVector<unsigned, 64> RLAmt; 2107 2108 SmallVector<BitGroup, 16> BitGroups; 2109 2110 DenseMap<std::pair<SDValue, unsigned>, ValueRotInfo> ValueRots; 2111 SmallVector<ValueRotInfo, 16> ValueRotsVec; 2112 2113 SelectionDAG *CurDAG; 2114 2115 public: 2116 BitPermutationSelector(SelectionDAG *DAG) 2117 : CurDAG(DAG) {} 2118 2119 // Here we try to match complex bit permutations into a set of 2120 // rotate-and-shift/shift/and/or instructions, using a set of heuristics 2121 // known to produce optimial code for common cases (like i32 byte swapping). 2122 SDNode *Select(SDNode *N) { 2123 Memoizer.clear(); 2124 auto Result = 2125 getValueBits(SDValue(N, 0), N->getValueType(0).getSizeInBits()); 2126 if (!Result.first) 2127 return nullptr; 2128 Bits = std::move(*Result.second); 2129 2130 DEBUG(dbgs() << "Considering bit-permutation-based instruction" 2131 " selection for: "); 2132 DEBUG(N->dump(CurDAG)); 2133 2134 // Fill it RLAmt and set HasZeros. 2135 computeRotationAmounts(); 2136 2137 if (!HasZeros) 2138 return Select(N, false); 2139 2140 // We currently have two techniques for handling results with zeros: early 2141 // masking (the default) and late masking. Late masking is sometimes more 2142 // efficient, but because the structure of the bit groups is different, it 2143 // is hard to tell without generating both and comparing the results. With 2144 // late masking, we ignore zeros in the resulting value when inserting each 2145 // set of bit groups, and then mask in the zeros at the end. With early 2146 // masking, we only insert the non-zero parts of the result at every step. 2147 2148 unsigned InstCnt, InstCntLateMask; 2149 DEBUG(dbgs() << "\tEarly masking:\n"); 2150 SDNode *RN = Select(N, false, &InstCnt); 2151 DEBUG(dbgs() << "\t\tisel would use " << InstCnt << " instructions\n"); 2152 2153 DEBUG(dbgs() << "\tLate masking:\n"); 2154 SDNode *RNLM = Select(N, true, &InstCntLateMask); 2155 DEBUG(dbgs() << "\t\tisel would use " << InstCntLateMask << 2156 " instructions\n"); 2157 2158 if (InstCnt <= InstCntLateMask) { 2159 DEBUG(dbgs() << "\tUsing early-masking for isel\n"); 2160 return RN; 2161 } 2162 2163 DEBUG(dbgs() << "\tUsing late-masking for isel\n"); 2164 return RNLM; 2165 } 2166 }; 2167 2168 class IntegerCompareEliminator { 2169 SelectionDAG *CurDAG; 2170 PPCDAGToDAGISel *S; 2171 // Conversion type for interpreting results of a 32-bit instruction as 2172 // a 64-bit value or vice versa. 2173 enum ExtOrTruncConversion { Ext, Trunc }; 2174 2175 // Modifiers to guide how an ISD::SETCC node's result is to be computed 2176 // in a GPR. 2177 // ZExtOrig - use the original condition code, zero-extend value 2178 // ZExtInvert - invert the condition code, zero-extend value 2179 // SExtOrig - use the original condition code, sign-extend value 2180 // SExtInvert - invert the condition code, sign-extend value 2181 enum SetccInGPROpts { ZExtOrig, ZExtInvert, SExtOrig, SExtInvert }; 2182 2183 // Comparisons against zero to emit GPR code sequences for. Each of these 2184 // sequences may need to be emitted for two or more equivalent patterns. 2185 // For example (a >= 0) == (a > -1). The direction of the comparison (</>) 2186 // matters as well as the extension type: sext (-1/0), zext (1/0). 2187 // GEZExt - (zext (LHS >= 0)) 2188 // GESExt - (sext (LHS >= 0)) 2189 // LEZExt - (zext (LHS <= 0)) 2190 // LESExt - (sext (LHS <= 0)) 2191 enum ZeroCompare { GEZExt, GESExt, LEZExt, LESExt }; 2192 2193 SDNode *tryEXTEND(SDNode *N); 2194 SDNode *tryLogicOpOfCompares(SDNode *N); 2195 SDValue computeLogicOpInGPR(SDValue LogicOp); 2196 SDValue signExtendInputIfNeeded(SDValue Input); 2197 SDValue zeroExtendInputIfNeeded(SDValue Input); 2198 SDValue addExtOrTrunc(SDValue NatWidthRes, ExtOrTruncConversion Conv); 2199 SDValue getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl, 2200 ZeroCompare CmpTy); 2201 SDValue get32BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, 2202 int64_t RHSValue, SDLoc dl); 2203 SDValue get32BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, 2204 int64_t RHSValue, SDLoc dl); 2205 SDValue get64BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, 2206 int64_t RHSValue, SDLoc dl); 2207 SDValue get64BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, 2208 int64_t RHSValue, SDLoc dl); 2209 SDValue getSETCCInGPR(SDValue Compare, SetccInGPROpts ConvOpts); 2210 2211 public: 2212 IntegerCompareEliminator(SelectionDAG *DAG, 2213 PPCDAGToDAGISel *Sel) : CurDAG(DAG), S(Sel) { 2214 assert(CurDAG->getTargetLoweringInfo() 2215 .getPointerTy(CurDAG->getDataLayout()).getSizeInBits() == 64 && 2216 "Only expecting to use this on 64 bit targets."); 2217 } 2218 SDNode *Select(SDNode *N) { 2219 if (CmpInGPR == ICGPR_None) 2220 return nullptr; 2221 switch (N->getOpcode()) { 2222 default: break; 2223 case ISD::ZERO_EXTEND: 2224 if (CmpInGPR == ICGPR_Sext || CmpInGPR == ICGPR_SextI32 || 2225 CmpInGPR == ICGPR_SextI64) 2226 return nullptr; 2227 LLVM_FALLTHROUGH; 2228 case ISD::SIGN_EXTEND: 2229 if (CmpInGPR == ICGPR_Zext || CmpInGPR == ICGPR_ZextI32 || 2230 CmpInGPR == ICGPR_ZextI64) 2231 return nullptr; 2232 return tryEXTEND(N); 2233 case ISD::AND: 2234 case ISD::OR: 2235 case ISD::XOR: 2236 return tryLogicOpOfCompares(N); 2237 } 2238 return nullptr; 2239 } 2240 }; 2241 2242 static bool isLogicOp(unsigned Opc) { 2243 return Opc == ISD::AND || Opc == ISD::OR || Opc == ISD::XOR; 2244 } 2245 // The obvious case for wanting to keep the value in a GPR. Namely, the 2246 // result of the comparison is actually needed in a GPR. 2247 SDNode *IntegerCompareEliminator::tryEXTEND(SDNode *N) { 2248 assert((N->getOpcode() == ISD::ZERO_EXTEND || 2249 N->getOpcode() == ISD::SIGN_EXTEND) && 2250 "Expecting a zero/sign extend node!"); 2251 SDValue WideRes; 2252 // If we are zero-extending the result of a logical operation on i1 2253 // values, we can keep the values in GPRs. 2254 if (isLogicOp(N->getOperand(0).getOpcode()) && 2255 N->getOperand(0).getValueType() == MVT::i1 && 2256 N->getOpcode() == ISD::ZERO_EXTEND) 2257 WideRes = computeLogicOpInGPR(N->getOperand(0)); 2258 else if (N->getOperand(0).getOpcode() != ISD::SETCC) 2259 return nullptr; 2260 else 2261 WideRes = 2262 getSETCCInGPR(N->getOperand(0), 2263 N->getOpcode() == ISD::SIGN_EXTEND ? 2264 SetccInGPROpts::SExtOrig : SetccInGPROpts::ZExtOrig); 2265 2266 if (!WideRes) 2267 return nullptr; 2268 2269 SDLoc dl(N); 2270 bool Input32Bit = WideRes.getValueType() == MVT::i32; 2271 bool Output32Bit = N->getValueType(0) == MVT::i32; 2272 2273 NumSextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 1 : 0; 2274 NumZextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 0 : 1; 2275 2276 SDValue ConvOp = WideRes; 2277 if (Input32Bit != Output32Bit) 2278 ConvOp = addExtOrTrunc(WideRes, Input32Bit ? ExtOrTruncConversion::Ext : 2279 ExtOrTruncConversion::Trunc); 2280 return ConvOp.getNode(); 2281 } 2282 2283 // Attempt to perform logical operations on the results of comparisons while 2284 // keeping the values in GPRs. Without doing so, these would end up being 2285 // lowered to CR-logical operations which suffer from significant latency and 2286 // low ILP. 2287 SDNode *IntegerCompareEliminator::tryLogicOpOfCompares(SDNode *N) { 2288 if (N->getValueType(0) != MVT::i1) 2289 return nullptr; 2290 assert(isLogicOp(N->getOpcode()) && 2291 "Expected a logic operation on setcc results."); 2292 SDValue LoweredLogical = computeLogicOpInGPR(SDValue(N, 0)); 2293 if (!LoweredLogical) 2294 return nullptr; 2295 2296 SDLoc dl(N); 2297 bool IsBitwiseNegate = LoweredLogical.getMachineOpcode() == PPC::XORI8; 2298 unsigned SubRegToExtract = IsBitwiseNegate ? PPC::sub_eq : PPC::sub_gt; 2299 SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32); 2300 SDValue LHS = LoweredLogical.getOperand(0); 2301 SDValue RHS = LoweredLogical.getOperand(1); 2302 SDValue WideOp; 2303 SDValue OpToConvToRecForm; 2304 2305 // Look through any 32-bit to 64-bit implicit extend nodes to find the 2306 // opcode that is input to the XORI. 2307 if (IsBitwiseNegate && 2308 LoweredLogical.getOperand(0).getMachineOpcode() == PPC::INSERT_SUBREG) 2309 OpToConvToRecForm = LoweredLogical.getOperand(0).getOperand(1); 2310 else if (IsBitwiseNegate) 2311 // If the input to the XORI isn't an extension, that's what we're after. 2312 OpToConvToRecForm = LoweredLogical.getOperand(0); 2313 else 2314 // If this is not an XORI, it is a reg-reg logical op and we can convert 2315 // it to record-form. 2316 OpToConvToRecForm = LoweredLogical; 2317 2318 // Get the record-form version of the node we're looking to use to get the 2319 // CR result from. 2320 uint16_t NonRecOpc = OpToConvToRecForm.getMachineOpcode(); 2321 int NewOpc = PPCInstrInfo::getRecordFormOpcode(NonRecOpc); 2322 2323 // Convert the right node to record-form. This is either the logical we're 2324 // looking at or it is the input node to the negation (if we're looking at 2325 // a bitwise negation). 2326 if (NewOpc != -1 && IsBitwiseNegate) { 2327 // The input to the XORI has a record-form. Use it. 2328 assert(LoweredLogical.getConstantOperandVal(1) == 1 && 2329 "Expected a PPC::XORI8 only for bitwise negation."); 2330 // Emit the record-form instruction. 2331 std::vector<SDValue> Ops; 2332 for (int i = 0, e = OpToConvToRecForm.getNumOperands(); i < e; i++) 2333 Ops.push_back(OpToConvToRecForm.getOperand(i)); 2334 2335 WideOp = 2336 SDValue(CurDAG->getMachineNode(NewOpc, dl, 2337 OpToConvToRecForm.getValueType(), 2338 MVT::Glue, Ops), 0); 2339 } else { 2340 assert((NewOpc != -1 || !IsBitwiseNegate) && 2341 "No record form available for AND8/OR8/XOR8?"); 2342 WideOp = 2343 SDValue(CurDAG->getMachineNode(NewOpc == -1 ? PPC::ANDIo8 : NewOpc, dl, 2344 MVT::i64, MVT::Glue, LHS, RHS), 0); 2345 } 2346 2347 // Select this node to a single bit from CR0 set by the record-form node 2348 // just created. For bitwise negation, use the EQ bit which is the equivalent 2349 // of negating the result (i.e. it is a bit set when the result of the 2350 // operation is zero). 2351 SDValue SRIdxVal = 2352 CurDAG->getTargetConstant(SubRegToExtract, dl, MVT::i32); 2353 SDValue CRBit = 2354 SDValue(CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, 2355 MVT::i1, CR0Reg, SRIdxVal, 2356 WideOp.getValue(1)), 0); 2357 return CRBit.getNode(); 2358 } 2359 2360 // Lower a logical operation on i1 values into a GPR sequence if possible. 2361 // The result can be kept in a GPR if requested. 2362 // Three types of inputs can be handled: 2363 // - SETCC 2364 // - TRUNCATE 2365 // - Logical operation (AND/OR/XOR) 2366 // There is also a special case that is handled (namely a complement operation 2367 // achieved with xor %a, -1). 2368 SDValue IntegerCompareEliminator::computeLogicOpInGPR(SDValue LogicOp) { 2369 assert(isLogicOp(LogicOp.getOpcode()) && 2370 "Can only handle logic operations here."); 2371 assert(LogicOp.getValueType() == MVT::i1 && 2372 "Can only handle logic operations on i1 values here."); 2373 SDLoc dl(LogicOp); 2374 SDValue LHS, RHS; 2375 2376 // Special case: xor %a, -1 2377 bool IsBitwiseNegation = isBitwiseNot(LogicOp); 2378 2379 // Produces a GPR sequence for each operand of the binary logic operation. 2380 // For SETCC, it produces the respective comparison, for TRUNCATE it truncates 2381 // the value in a GPR and for logic operations, it will recursively produce 2382 // a GPR sequence for the operation. 2383 auto getLogicOperand = [&] (SDValue Operand) -> SDValue { 2384 unsigned OperandOpcode = Operand.getOpcode(); 2385 if (OperandOpcode == ISD::SETCC) 2386 return getSETCCInGPR(Operand, SetccInGPROpts::ZExtOrig); 2387 else if (OperandOpcode == ISD::TRUNCATE) { 2388 SDValue InputOp = Operand.getOperand(0); 2389 EVT InVT = InputOp.getValueType(); 2390 return SDValue(CurDAG->getMachineNode(InVT == MVT::i32 ? PPC::RLDICL_32 : 2391 PPC::RLDICL, dl, InVT, InputOp, 2392 S->getI64Imm(0, dl), 2393 S->getI64Imm(63, dl)), 0); 2394 } else if (isLogicOp(OperandOpcode)) 2395 return computeLogicOpInGPR(Operand); 2396 return SDValue(); 2397 }; 2398 LHS = getLogicOperand(LogicOp.getOperand(0)); 2399 RHS = getLogicOperand(LogicOp.getOperand(1)); 2400 2401 // If a GPR sequence can't be produced for the LHS we can't proceed. 2402 // Not producing a GPR sequence for the RHS is only a problem if this isn't 2403 // a bitwise negation operation. 2404 if (!LHS || (!RHS && !IsBitwiseNegation)) 2405 return SDValue(); 2406 2407 NumLogicOpsOnComparison++; 2408 2409 // We will use the inputs as 64-bit values. 2410 if (LHS.getValueType() == MVT::i32) 2411 LHS = addExtOrTrunc(LHS, ExtOrTruncConversion::Ext); 2412 if (!IsBitwiseNegation && RHS.getValueType() == MVT::i32) 2413 RHS = addExtOrTrunc(RHS, ExtOrTruncConversion::Ext); 2414 2415 unsigned NewOpc; 2416 switch (LogicOp.getOpcode()) { 2417 default: llvm_unreachable("Unknown logic operation."); 2418 case ISD::AND: NewOpc = PPC::AND8; break; 2419 case ISD::OR: NewOpc = PPC::OR8; break; 2420 case ISD::XOR: NewOpc = PPC::XOR8; break; 2421 } 2422 2423 if (IsBitwiseNegation) { 2424 RHS = S->getI64Imm(1, dl); 2425 NewOpc = PPC::XORI8; 2426 } 2427 2428 return SDValue(CurDAG->getMachineNode(NewOpc, dl, MVT::i64, LHS, RHS), 0); 2429 2430 } 2431 2432 /// If the value isn't guaranteed to be sign-extended to 64-bits, extend it. 2433 /// Otherwise just reinterpret it as a 64-bit value. 2434 /// Useful when emitting comparison code for 32-bit values without using 2435 /// the compare instruction (which only considers the lower 32-bits). 2436 SDValue IntegerCompareEliminator::signExtendInputIfNeeded(SDValue Input) { 2437 assert(Input.getValueType() == MVT::i32 && 2438 "Can only sign-extend 32-bit values here."); 2439 unsigned Opc = Input.getOpcode(); 2440 2441 // The value was sign extended and then truncated to 32-bits. No need to 2442 // sign extend it again. 2443 if (Opc == ISD::TRUNCATE && 2444 (Input.getOperand(0).getOpcode() == ISD::AssertSext || 2445 Input.getOperand(0).getOpcode() == ISD::SIGN_EXTEND)) 2446 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); 2447 2448 LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input); 2449 // The input is a sign-extending load. All ppc sign-extending loads 2450 // sign-extend to the full 64-bits. 2451 if (InputLoad && InputLoad->getExtensionType() == ISD::SEXTLOAD) 2452 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); 2453 2454 ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input); 2455 // We don't sign-extend constants. 2456 if (InputConst) 2457 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); 2458 2459 SDLoc dl(Input); 2460 SignExtensionsAdded++; 2461 return SDValue(CurDAG->getMachineNode(PPC::EXTSW_32_64, dl, 2462 MVT::i64, Input), 0); 2463 } 2464 2465 /// If the value isn't guaranteed to be zero-extended to 64-bits, extend it. 2466 /// Otherwise just reinterpret it as a 64-bit value. 2467 /// Useful when emitting comparison code for 32-bit values without using 2468 /// the compare instruction (which only considers the lower 32-bits). 2469 SDValue IntegerCompareEliminator::zeroExtendInputIfNeeded(SDValue Input) { 2470 assert(Input.getValueType() == MVT::i32 && 2471 "Can only zero-extend 32-bit values here."); 2472 unsigned Opc = Input.getOpcode(); 2473 2474 // The only condition under which we can omit the actual extend instruction: 2475 // - The value is a positive constant 2476 // - The value comes from a load that isn't a sign-extending load 2477 // An ISD::TRUNCATE needs to be zero-extended unless it is fed by a zext. 2478 bool IsTruncateOfZExt = Opc == ISD::TRUNCATE && 2479 (Input.getOperand(0).getOpcode() == ISD::AssertZext || 2480 Input.getOperand(0).getOpcode() == ISD::ZERO_EXTEND); 2481 if (IsTruncateOfZExt) 2482 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); 2483 2484 ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input); 2485 if (InputConst && InputConst->getSExtValue() >= 0) 2486 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); 2487 2488 LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input); 2489 // The input is a load that doesn't sign-extend (it will be zero-extended). 2490 if (InputLoad && InputLoad->getExtensionType() != ISD::SEXTLOAD) 2491 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); 2492 2493 // None of the above, need to zero-extend. 2494 SDLoc dl(Input); 2495 ZeroExtensionsAdded++; 2496 return SDValue(CurDAG->getMachineNode(PPC::RLDICL_32_64, dl, MVT::i64, Input, 2497 S->getI64Imm(0, dl), 2498 S->getI64Imm(32, dl)), 0); 2499 } 2500 2501 // Handle a 32-bit value in a 64-bit register and vice-versa. These are of 2502 // course not actual zero/sign extensions that will generate machine code, 2503 // they're just a way to reinterpret a 32 bit value in a register as a 2504 // 64 bit value and vice-versa. 2505 SDValue IntegerCompareEliminator::addExtOrTrunc(SDValue NatWidthRes, 2506 ExtOrTruncConversion Conv) { 2507 SDLoc dl(NatWidthRes); 2508 2509 // For reinterpreting 32-bit values as 64 bit values, we generate 2510 // INSERT_SUBREG IMPLICIT_DEF:i64, <input>, TargetConstant:i32<1> 2511 if (Conv == ExtOrTruncConversion::Ext) { 2512 SDValue ImDef(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, MVT::i64), 0); 2513 SDValue SubRegIdx = 2514 CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32); 2515 return SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, MVT::i64, 2516 ImDef, NatWidthRes, SubRegIdx), 0); 2517 } 2518 2519 assert(Conv == ExtOrTruncConversion::Trunc && 2520 "Unknown convertion between 32 and 64 bit values."); 2521 // For reinterpreting 64-bit values as 32-bit values, we just need to 2522 // EXTRACT_SUBREG (i.e. extract the low word). 2523 SDValue SubRegIdx = 2524 CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32); 2525 return SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl, MVT::i32, 2526 NatWidthRes, SubRegIdx), 0); 2527 } 2528 2529 // Produce a GPR sequence for compound comparisons (<=, >=) against zero. 2530 // Handle both zero-extensions and sign-extensions. 2531 SDValue 2532 IntegerCompareEliminator::getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl, 2533 ZeroCompare CmpTy) { 2534 EVT InVT = LHS.getValueType(); 2535 bool Is32Bit = InVT == MVT::i32; 2536 SDValue ToExtend; 2537 2538 // Produce the value that needs to be either zero or sign extended. 2539 switch (CmpTy) { 2540 case ZeroCompare::GEZExt: 2541 case ZeroCompare::GESExt: 2542 ToExtend = SDValue(CurDAG->getMachineNode(Is32Bit ? PPC::NOR : PPC::NOR8, 2543 dl, InVT, LHS, LHS), 0); 2544 break; 2545 case ZeroCompare::LEZExt: 2546 case ZeroCompare::LESExt: { 2547 if (Is32Bit) { 2548 // Upper 32 bits cannot be undefined for this sequence. 2549 LHS = signExtendInputIfNeeded(LHS); 2550 SDValue Neg = 2551 SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0); 2552 ToExtend = 2553 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, 2554 Neg, S->getI64Imm(1, dl), 2555 S->getI64Imm(63, dl)), 0); 2556 } else { 2557 SDValue Addi = 2558 SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS, 2559 S->getI64Imm(~0ULL, dl)), 0); 2560 ToExtend = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64, 2561 Addi, LHS), 0); 2562 } 2563 break; 2564 } 2565 } 2566 2567 // For 64-bit sequences, the extensions are the same for the GE/LE cases. 2568 if (!Is32Bit && 2569 (CmpTy == ZeroCompare::GEZExt || CmpTy == ZeroCompare::LEZExt)) 2570 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, 2571 ToExtend, S->getI64Imm(1, dl), 2572 S->getI64Imm(63, dl)), 0); 2573 if (!Is32Bit && 2574 (CmpTy == ZeroCompare::GESExt || CmpTy == ZeroCompare::LESExt)) 2575 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, ToExtend, 2576 S->getI64Imm(63, dl)), 0); 2577 2578 assert(Is32Bit && "Should have handled the 32-bit sequences above."); 2579 // For 32-bit sequences, the extensions differ between GE/LE cases. 2580 switch (CmpTy) { 2581 case ZeroCompare::GEZExt: { 2582 SDValue ShiftOps[] = { ToExtend, S->getI32Imm(1, dl), S->getI32Imm(31, dl), 2583 S->getI32Imm(31, dl) }; 2584 return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, 2585 ShiftOps), 0); 2586 } 2587 case ZeroCompare::GESExt: 2588 return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, ToExtend, 2589 S->getI32Imm(31, dl)), 0); 2590 case ZeroCompare::LEZExt: 2591 return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, ToExtend, 2592 S->getI32Imm(1, dl)), 0); 2593 case ZeroCompare::LESExt: 2594 return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, ToExtend, 2595 S->getI32Imm(-1, dl)), 0); 2596 } 2597 2598 // The above case covers all the enumerators so it can't have a default clause 2599 // to avoid compiler warnings. 2600 llvm_unreachable("Unknown zero-comparison type."); 2601 } 2602 2603 /// Produces a zero-extended result of comparing two 32-bit values according to 2604 /// the passed condition code. 2605 SDValue 2606 IntegerCompareEliminator::get32BitZExtCompare(SDValue LHS, SDValue RHS, 2607 ISD::CondCode CC, 2608 int64_t RHSValue, SDLoc dl) { 2609 if (CmpInGPR == ICGPR_I64 || CmpInGPR == ICGPR_SextI64 || 2610 CmpInGPR == ICGPR_ZextI64 || CmpInGPR == ICGPR_Sext) 2611 return SDValue(); 2612 bool IsRHSZero = RHSValue == 0; 2613 bool IsRHSOne = RHSValue == 1; 2614 bool IsRHSNegOne = RHSValue == -1LL; 2615 switch (CC) { 2616 default: return SDValue(); 2617 case ISD::SETEQ: { 2618 // (zext (setcc %a, %b, seteq)) -> (lshr (cntlzw (xor %a, %b)), 5) 2619 // (zext (setcc %a, 0, seteq)) -> (lshr (cntlzw %a), 5) 2620 SDValue Xor = IsRHSZero ? LHS : 2621 SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0); 2622 SDValue Clz = 2623 SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0); 2624 SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl), 2625 S->getI32Imm(31, dl) }; 2626 return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, 2627 ShiftOps), 0); 2628 } 2629 case ISD::SETNE: { 2630 // (zext (setcc %a, %b, setne)) -> (xor (lshr (cntlzw (xor %a, %b)), 5), 1) 2631 // (zext (setcc %a, 0, setne)) -> (xor (lshr (cntlzw %a), 5), 1) 2632 SDValue Xor = IsRHSZero ? LHS : 2633 SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0); 2634 SDValue Clz = 2635 SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0); 2636 SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl), 2637 S->getI32Imm(31, dl) }; 2638 SDValue Shift = 2639 SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0); 2640 return SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift, 2641 S->getI32Imm(1, dl)), 0); 2642 } 2643 case ISD::SETGE: { 2644 // (zext (setcc %a, %b, setge)) -> (xor (lshr (sub %a, %b), 63), 1) 2645 // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 31) 2646 if(IsRHSZero) 2647 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt); 2648 2649 // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a) 2650 // by swapping inputs and falling through. 2651 std::swap(LHS, RHS); 2652 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); 2653 IsRHSZero = RHSConst && RHSConst->isNullValue(); 2654 LLVM_FALLTHROUGH; 2655 } 2656 case ISD::SETLE: { 2657 if (CmpInGPR == ICGPR_NonExtIn) 2658 return SDValue(); 2659 // (zext (setcc %a, %b, setle)) -> (xor (lshr (sub %b, %a), 63), 1) 2660 // (zext (setcc %a, 0, setle)) -> (xor (lshr (- %a), 63), 1) 2661 if(IsRHSZero) { 2662 if (CmpInGPR == ICGPR_NonExtIn) 2663 return SDValue(); 2664 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt); 2665 } 2666 2667 // The upper 32-bits of the register can't be undefined for this sequence. 2668 LHS = signExtendInputIfNeeded(LHS); 2669 RHS = signExtendInputIfNeeded(RHS); 2670 SDValue Sub = 2671 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0); 2672 SDValue Shift = 2673 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Sub, 2674 S->getI64Imm(1, dl), S->getI64Imm(63, dl)), 2675 0); 2676 return 2677 SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, 2678 MVT::i64, Shift, S->getI32Imm(1, dl)), 0); 2679 } 2680 case ISD::SETGT: { 2681 // (zext (setcc %a, %b, setgt)) -> (lshr (sub %b, %a), 63) 2682 // (zext (setcc %a, -1, setgt)) -> (lshr (~ %a), 31) 2683 // (zext (setcc %a, 0, setgt)) -> (lshr (- %a), 63) 2684 // Handle SETLT -1 (which is equivalent to SETGE 0). 2685 if (IsRHSNegOne) 2686 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt); 2687 2688 if (IsRHSZero) { 2689 if (CmpInGPR == ICGPR_NonExtIn) 2690 return SDValue(); 2691 // The upper 32-bits of the register can't be undefined for this sequence. 2692 LHS = signExtendInputIfNeeded(LHS); 2693 RHS = signExtendInputIfNeeded(RHS); 2694 SDValue Neg = 2695 SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0); 2696 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, 2697 Neg, S->getI32Imm(1, dl), S->getI32Imm(63, dl)), 0); 2698 } 2699 // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as 2700 // (%b < %a) by swapping inputs and falling through. 2701 std::swap(LHS, RHS); 2702 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); 2703 IsRHSZero = RHSConst && RHSConst->isNullValue(); 2704 IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1; 2705 LLVM_FALLTHROUGH; 2706 } 2707 case ISD::SETLT: { 2708 // (zext (setcc %a, %b, setlt)) -> (lshr (sub %a, %b), 63) 2709 // (zext (setcc %a, 1, setlt)) -> (xor (lshr (- %a), 63), 1) 2710 // (zext (setcc %a, 0, setlt)) -> (lshr %a, 31) 2711 // Handle SETLT 1 (which is equivalent to SETLE 0). 2712 if (IsRHSOne) { 2713 if (CmpInGPR == ICGPR_NonExtIn) 2714 return SDValue(); 2715 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt); 2716 } 2717 2718 if (IsRHSZero) { 2719 SDValue ShiftOps[] = { LHS, S->getI32Imm(1, dl), S->getI32Imm(31, dl), 2720 S->getI32Imm(31, dl) }; 2721 return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, 2722 ShiftOps), 0); 2723 } 2724 2725 if (CmpInGPR == ICGPR_NonExtIn) 2726 return SDValue(); 2727 // The upper 32-bits of the register can't be undefined for this sequence. 2728 LHS = signExtendInputIfNeeded(LHS); 2729 RHS = signExtendInputIfNeeded(RHS); 2730 SDValue SUBFNode = 2731 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0); 2732 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, 2733 SUBFNode, S->getI64Imm(1, dl), 2734 S->getI64Imm(63, dl)), 0); 2735 } 2736 case ISD::SETUGE: 2737 // (zext (setcc %a, %b, setuge)) -> (xor (lshr (sub %b, %a), 63), 1) 2738 // (zext (setcc %a, %b, setule)) -> (xor (lshr (sub %a, %b), 63), 1) 2739 std::swap(LHS, RHS); 2740 LLVM_FALLTHROUGH; 2741 case ISD::SETULE: { 2742 if (CmpInGPR == ICGPR_NonExtIn) 2743 return SDValue(); 2744 // The upper 32-bits of the register can't be undefined for this sequence. 2745 LHS = zeroExtendInputIfNeeded(LHS); 2746 RHS = zeroExtendInputIfNeeded(RHS); 2747 SDValue Subtract = 2748 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0); 2749 SDValue SrdiNode = 2750 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, 2751 Subtract, S->getI64Imm(1, dl), 2752 S->getI64Imm(63, dl)), 0); 2753 return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, SrdiNode, 2754 S->getI32Imm(1, dl)), 0); 2755 } 2756 case ISD::SETUGT: 2757 // (zext (setcc %a, %b, setugt)) -> (lshr (sub %b, %a), 63) 2758 // (zext (setcc %a, %b, setult)) -> (lshr (sub %a, %b), 63) 2759 std::swap(LHS, RHS); 2760 LLVM_FALLTHROUGH; 2761 case ISD::SETULT: { 2762 if (CmpInGPR == ICGPR_NonExtIn) 2763 return SDValue(); 2764 // The upper 32-bits of the register can't be undefined for this sequence. 2765 LHS = zeroExtendInputIfNeeded(LHS); 2766 RHS = zeroExtendInputIfNeeded(RHS); 2767 SDValue Subtract = 2768 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0); 2769 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, 2770 Subtract, S->getI64Imm(1, dl), 2771 S->getI64Imm(63, dl)), 0); 2772 } 2773 } 2774 } 2775 2776 /// Produces a sign-extended result of comparing two 32-bit values according to 2777 /// the passed condition code. 2778 SDValue 2779 IntegerCompareEliminator::get32BitSExtCompare(SDValue LHS, SDValue RHS, 2780 ISD::CondCode CC, 2781 int64_t RHSValue, SDLoc dl) { 2782 if (CmpInGPR == ICGPR_I64 || CmpInGPR == ICGPR_SextI64 || 2783 CmpInGPR == ICGPR_ZextI64 || CmpInGPR == ICGPR_Zext) 2784 return SDValue(); 2785 bool IsRHSZero = RHSValue == 0; 2786 bool IsRHSOne = RHSValue == 1; 2787 bool IsRHSNegOne = RHSValue == -1LL; 2788 2789 switch (CC) { 2790 default: return SDValue(); 2791 case ISD::SETEQ: { 2792 // (sext (setcc %a, %b, seteq)) -> 2793 // (ashr (shl (ctlz (xor %a, %b)), 58), 63) 2794 // (sext (setcc %a, 0, seteq)) -> 2795 // (ashr (shl (ctlz %a), 58), 63) 2796 SDValue CountInput = IsRHSZero ? LHS : 2797 SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0); 2798 SDValue Cntlzw = 2799 SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, CountInput), 0); 2800 SDValue SHLOps[] = { Cntlzw, S->getI32Imm(27, dl), 2801 S->getI32Imm(5, dl), S->getI32Imm(31, dl) }; 2802 SDValue Slwi = 2803 SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, SHLOps), 0); 2804 return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Slwi), 0); 2805 } 2806 case ISD::SETNE: { 2807 // Bitwise xor the operands, count leading zeros, shift right by 5 bits and 2808 // flip the bit, finally take 2's complement. 2809 // (sext (setcc %a, %b, setne)) -> 2810 // (neg (xor (lshr (ctlz (xor %a, %b)), 5), 1)) 2811 // Same as above, but the first xor is not needed. 2812 // (sext (setcc %a, 0, setne)) -> 2813 // (neg (xor (lshr (ctlz %a), 5), 1)) 2814 SDValue Xor = IsRHSZero ? LHS : 2815 SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0); 2816 SDValue Clz = 2817 SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0); 2818 SDValue ShiftOps[] = 2819 { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl), S->getI32Imm(31, dl) }; 2820 SDValue Shift = 2821 SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0); 2822 SDValue Xori = 2823 SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift, 2824 S->getI32Imm(1, dl)), 0); 2825 return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Xori), 0); 2826 } 2827 case ISD::SETGE: { 2828 // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %a, %b), 63), -1) 2829 // (sext (setcc %a, 0, setge)) -> (ashr (~ %a), 31) 2830 if (IsRHSZero) 2831 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt); 2832 2833 // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a) 2834 // by swapping inputs and falling through. 2835 std::swap(LHS, RHS); 2836 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); 2837 IsRHSZero = RHSConst && RHSConst->isNullValue(); 2838 LLVM_FALLTHROUGH; 2839 } 2840 case ISD::SETLE: { 2841 if (CmpInGPR == ICGPR_NonExtIn) 2842 return SDValue(); 2843 // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %b, %a), 63), -1) 2844 // (sext (setcc %a, 0, setle)) -> (add (lshr (- %a), 63), -1) 2845 if (IsRHSZero) 2846 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt); 2847 2848 // The upper 32-bits of the register can't be undefined for this sequence. 2849 LHS = signExtendInputIfNeeded(LHS); 2850 RHS = signExtendInputIfNeeded(RHS); 2851 SDValue SUBFNode = 2852 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, MVT::Glue, 2853 LHS, RHS), 0); 2854 SDValue Srdi = 2855 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, 2856 SUBFNode, S->getI64Imm(1, dl), 2857 S->getI64Imm(63, dl)), 0); 2858 return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Srdi, 2859 S->getI32Imm(-1, dl)), 0); 2860 } 2861 case ISD::SETGT: { 2862 // (sext (setcc %a, %b, setgt)) -> (ashr (sub %b, %a), 63) 2863 // (sext (setcc %a, -1, setgt)) -> (ashr (~ %a), 31) 2864 // (sext (setcc %a, 0, setgt)) -> (ashr (- %a), 63) 2865 if (IsRHSNegOne) 2866 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt); 2867 if (IsRHSZero) { 2868 if (CmpInGPR == ICGPR_NonExtIn) 2869 return SDValue(); 2870 // The upper 32-bits of the register can't be undefined for this sequence. 2871 LHS = signExtendInputIfNeeded(LHS); 2872 RHS = signExtendInputIfNeeded(RHS); 2873 SDValue Neg = 2874 SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0); 2875 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Neg, 2876 S->getI64Imm(63, dl)), 0); 2877 } 2878 // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as 2879 // (%b < %a) by swapping inputs and falling through. 2880 std::swap(LHS, RHS); 2881 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); 2882 IsRHSZero = RHSConst && RHSConst->isNullValue(); 2883 IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1; 2884 LLVM_FALLTHROUGH; 2885 } 2886 case ISD::SETLT: { 2887 // (sext (setcc %a, %b, setgt)) -> (ashr (sub %a, %b), 63) 2888 // (sext (setcc %a, 1, setgt)) -> (add (lshr (- %a), 63), -1) 2889 // (sext (setcc %a, 0, setgt)) -> (ashr %a, 31) 2890 if (IsRHSOne) { 2891 if (CmpInGPR == ICGPR_NonExtIn) 2892 return SDValue(); 2893 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt); 2894 } 2895 if (IsRHSZero) 2896 return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, LHS, 2897 S->getI32Imm(31, dl)), 0); 2898 2899 if (CmpInGPR == ICGPR_NonExtIn) 2900 return SDValue(); 2901 // The upper 32-bits of the register can't be undefined for this sequence. 2902 LHS = signExtendInputIfNeeded(LHS); 2903 RHS = signExtendInputIfNeeded(RHS); 2904 SDValue SUBFNode = 2905 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0); 2906 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, 2907 SUBFNode, S->getI64Imm(63, dl)), 0); 2908 } 2909 case ISD::SETUGE: 2910 // (sext (setcc %a, %b, setuge)) -> (add (lshr (sub %a, %b), 63), -1) 2911 // (sext (setcc %a, %b, setule)) -> (add (lshr (sub %b, %a), 63), -1) 2912 std::swap(LHS, RHS); 2913 LLVM_FALLTHROUGH; 2914 case ISD::SETULE: { 2915 if (CmpInGPR == ICGPR_NonExtIn) 2916 return SDValue(); 2917 // The upper 32-bits of the register can't be undefined for this sequence. 2918 LHS = zeroExtendInputIfNeeded(LHS); 2919 RHS = zeroExtendInputIfNeeded(RHS); 2920 SDValue Subtract = 2921 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0); 2922 SDValue Shift = 2923 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Subtract, 2924 S->getI32Imm(1, dl), S->getI32Imm(63,dl)), 2925 0); 2926 return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Shift, 2927 S->getI32Imm(-1, dl)), 0); 2928 } 2929 case ISD::SETUGT: 2930 // (sext (setcc %a, %b, setugt)) -> (ashr (sub %b, %a), 63) 2931 // (sext (setcc %a, %b, setugt)) -> (ashr (sub %a, %b), 63) 2932 std::swap(LHS, RHS); 2933 LLVM_FALLTHROUGH; 2934 case ISD::SETULT: { 2935 if (CmpInGPR == ICGPR_NonExtIn) 2936 return SDValue(); 2937 // The upper 32-bits of the register can't be undefined for this sequence. 2938 LHS = zeroExtendInputIfNeeded(LHS); 2939 RHS = zeroExtendInputIfNeeded(RHS); 2940 SDValue Subtract = 2941 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0); 2942 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, 2943 Subtract, S->getI64Imm(63, dl)), 0); 2944 } 2945 } 2946 } 2947 2948 /// Produces a zero-extended result of comparing two 64-bit values according to 2949 /// the passed condition code. 2950 SDValue 2951 IntegerCompareEliminator::get64BitZExtCompare(SDValue LHS, SDValue RHS, 2952 ISD::CondCode CC, 2953 int64_t RHSValue, SDLoc dl) { 2954 if (CmpInGPR == ICGPR_I32 || CmpInGPR == ICGPR_SextI32 || 2955 CmpInGPR == ICGPR_ZextI32 || CmpInGPR == ICGPR_Sext) 2956 return SDValue(); 2957 bool IsRHSZero = RHSValue == 0; 2958 bool IsRHSOne = RHSValue == 1; 2959 bool IsRHSNegOne = RHSValue == -1LL; 2960 switch (CC) { 2961 default: return SDValue(); 2962 case ISD::SETEQ: { 2963 // (zext (setcc %a, %b, seteq)) -> (lshr (ctlz (xor %a, %b)), 6) 2964 // (zext (setcc %a, 0, seteq)) -> (lshr (ctlz %a), 6) 2965 SDValue Xor = IsRHSZero ? LHS : 2966 SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0); 2967 SDValue Clz = 2968 SDValue(CurDAG->getMachineNode(PPC::CNTLZD, dl, MVT::i64, Xor), 0); 2969 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Clz, 2970 S->getI64Imm(58, dl), 2971 S->getI64Imm(63, dl)), 0); 2972 } 2973 case ISD::SETNE: { 2974 // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1) 2975 // (zext (setcc %a, %b, setne)) -> (sube addc.reg, addc.reg, addc.CA) 2976 // {addcz.reg, addcz.CA} = (addcarry %a, -1) 2977 // (zext (setcc %a, 0, setne)) -> (sube addcz.reg, addcz.reg, addcz.CA) 2978 SDValue Xor = IsRHSZero ? LHS : 2979 SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0); 2980 SDValue AC = 2981 SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue, 2982 Xor, S->getI32Imm(~0U, dl)), 0); 2983 return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, AC, 2984 Xor, AC.getValue(1)), 0); 2985 } 2986 case ISD::SETGE: { 2987 // {subc.reg, subc.CA} = (subcarry %a, %b) 2988 // (zext (setcc %a, %b, setge)) -> 2989 // (adde (lshr %b, 63), (ashr %a, 63), subc.CA) 2990 // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 63) 2991 if (IsRHSZero) 2992 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt); 2993 std::swap(LHS, RHS); 2994 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); 2995 IsRHSZero = RHSConst && RHSConst->isNullValue(); 2996 LLVM_FALLTHROUGH; 2997 } 2998 case ISD::SETLE: { 2999 // {subc.reg, subc.CA} = (subcarry %b, %a) 3000 // (zext (setcc %a, %b, setge)) -> 3001 // (adde (lshr %a, 63), (ashr %b, 63), subc.CA) 3002 // (zext (setcc %a, 0, setge)) -> (lshr (or %a, (add %a, -1)), 63) 3003 if (IsRHSZero) 3004 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt); 3005 SDValue ShiftL = 3006 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS, 3007 S->getI64Imm(1, dl), 3008 S->getI64Imm(63, dl)), 0); 3009 SDValue ShiftR = 3010 SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS, 3011 S->getI64Imm(63, dl)), 0); 3012 SDValue SubtractCarry = 3013 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, 3014 LHS, RHS), 1); 3015 return SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue, 3016 ShiftR, ShiftL, SubtractCarry), 0); 3017 } 3018 case ISD::SETGT: { 3019 // {subc.reg, subc.CA} = (subcarry %b, %a) 3020 // (zext (setcc %a, %b, setgt)) -> 3021 // (xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1) 3022 // (zext (setcc %a, 0, setgt)) -> (lshr (nor (add %a, -1), %a), 63) 3023 if (IsRHSNegOne) 3024 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt); 3025 if (IsRHSZero) { 3026 SDValue Addi = 3027 SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS, 3028 S->getI64Imm(~0ULL, dl)), 0); 3029 SDValue Nor = 3030 SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Addi, LHS), 0); 3031 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Nor, 3032 S->getI64Imm(1, dl), 3033 S->getI64Imm(63, dl)), 0); 3034 } 3035 std::swap(LHS, RHS); 3036 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); 3037 IsRHSZero = RHSConst && RHSConst->isNullValue(); 3038 IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1; 3039 LLVM_FALLTHROUGH; 3040 } 3041 case ISD::SETLT: { 3042 // {subc.reg, subc.CA} = (subcarry %a, %b) 3043 // (zext (setcc %a, %b, setlt)) -> 3044 // (xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1) 3045 // (zext (setcc %a, 0, setlt)) -> (lshr %a, 63) 3046 if (IsRHSOne) 3047 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt); 3048 if (IsRHSZero) 3049 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS, 3050 S->getI64Imm(1, dl), 3051 S->getI64Imm(63, dl)), 0); 3052 SDValue SRADINode = 3053 SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, 3054 LHS, S->getI64Imm(63, dl)), 0); 3055 SDValue SRDINode = 3056 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, 3057 RHS, S->getI64Imm(1, dl), 3058 S->getI64Imm(63, dl)), 0); 3059 SDValue SUBFC8Carry = 3060 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, 3061 RHS, LHS), 1); 3062 SDValue ADDE8Node = 3063 SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue, 3064 SRDINode, SRADINode, SUBFC8Carry), 0); 3065 return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, 3066 ADDE8Node, S->getI64Imm(1, dl)), 0); 3067 } 3068 case ISD::SETUGE: 3069 // {subc.reg, subc.CA} = (subcarry %a, %b) 3070 // (zext (setcc %a, %b, setuge)) -> (add (sube %b, %b, subc.CA), 1) 3071 std::swap(LHS, RHS); 3072 LLVM_FALLTHROUGH; 3073 case ISD::SETULE: { 3074 // {subc.reg, subc.CA} = (subcarry %b, %a) 3075 // (zext (setcc %a, %b, setule)) -> (add (sube %a, %a, subc.CA), 1) 3076 SDValue SUBFC8Carry = 3077 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, 3078 LHS, RHS), 1); 3079 SDValue SUBFE8Node = 3080 SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue, 3081 LHS, LHS, SUBFC8Carry), 0); 3082 return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, 3083 SUBFE8Node, S->getI64Imm(1, dl)), 0); 3084 } 3085 case ISD::SETUGT: 3086 // {subc.reg, subc.CA} = (subcarry %b, %a) 3087 // (zext (setcc %a, %b, setugt)) -> -(sube %b, %b, subc.CA) 3088 std::swap(LHS, RHS); 3089 LLVM_FALLTHROUGH; 3090 case ISD::SETULT: { 3091 // {subc.reg, subc.CA} = (subcarry %a, %b) 3092 // (zext (setcc %a, %b, setult)) -> -(sube %a, %a, subc.CA) 3093 SDValue SubtractCarry = 3094 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, 3095 RHS, LHS), 1); 3096 SDValue ExtSub = 3097 SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, 3098 LHS, LHS, SubtractCarry), 0); 3099 return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, 3100 ExtSub), 0); 3101 } 3102 } 3103 } 3104 3105 /// Produces a sign-extended result of comparing two 64-bit values according to 3106 /// the passed condition code. 3107 SDValue 3108 IntegerCompareEliminator::get64BitSExtCompare(SDValue LHS, SDValue RHS, 3109 ISD::CondCode CC, 3110 int64_t RHSValue, SDLoc dl) { 3111 if (CmpInGPR == ICGPR_I32 || CmpInGPR == ICGPR_SextI32 || 3112 CmpInGPR == ICGPR_ZextI32 || CmpInGPR == ICGPR_Zext) 3113 return SDValue(); 3114 bool IsRHSZero = RHSValue == 0; 3115 bool IsRHSOne = RHSValue == 1; 3116 bool IsRHSNegOne = RHSValue == -1LL; 3117 switch (CC) { 3118 default: return SDValue(); 3119 case ISD::SETEQ: { 3120 // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1) 3121 // (sext (setcc %a, %b, seteq)) -> (sube addc.reg, addc.reg, addc.CA) 3122 // {addcz.reg, addcz.CA} = (addcarry %a, -1) 3123 // (sext (setcc %a, 0, seteq)) -> (sube addcz.reg, addcz.reg, addcz.CA) 3124 SDValue AddInput = IsRHSZero ? LHS : 3125 SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0); 3126 SDValue Addic = 3127 SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue, 3128 AddInput, S->getI32Imm(~0U, dl)), 0); 3129 return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, Addic, 3130 Addic, Addic.getValue(1)), 0); 3131 } 3132 case ISD::SETNE: { 3133 // {subfc.reg, subfc.CA} = (subcarry 0, (xor %a, %b)) 3134 // (sext (setcc %a, %b, setne)) -> (sube subfc.reg, subfc.reg, subfc.CA) 3135 // {subfcz.reg, subfcz.CA} = (subcarry 0, %a) 3136 // (sext (setcc %a, 0, setne)) -> (sube subfcz.reg, subfcz.reg, subfcz.CA) 3137 SDValue Xor = IsRHSZero ? LHS : 3138 SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0); 3139 SDValue SC = 3140 SDValue(CurDAG->getMachineNode(PPC::SUBFIC8, dl, MVT::i64, MVT::Glue, 3141 Xor, S->getI32Imm(0, dl)), 0); 3142 return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, SC, 3143 SC, SC.getValue(1)), 0); 3144 } 3145 case ISD::SETGE: { 3146 // {subc.reg, subc.CA} = (subcarry %a, %b) 3147 // (zext (setcc %a, %b, setge)) -> 3148 // (- (adde (lshr %b, 63), (ashr %a, 63), subc.CA)) 3149 // (zext (setcc %a, 0, setge)) -> (~ (ashr %a, 63)) 3150 if (IsRHSZero) 3151 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt); 3152 std::swap(LHS, RHS); 3153 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); 3154 IsRHSZero = RHSConst && RHSConst->isNullValue(); 3155 LLVM_FALLTHROUGH; 3156 } 3157 case ISD::SETLE: { 3158 // {subc.reg, subc.CA} = (subcarry %b, %a) 3159 // (zext (setcc %a, %b, setge)) -> 3160 // (- (adde (lshr %a, 63), (ashr %b, 63), subc.CA)) 3161 // (zext (setcc %a, 0, setge)) -> (ashr (or %a, (add %a, -1)), 63) 3162 if (IsRHSZero) 3163 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt); 3164 SDValue ShiftR = 3165 SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS, 3166 S->getI64Imm(63, dl)), 0); 3167 SDValue ShiftL = 3168 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS, 3169 S->getI64Imm(1, dl), 3170 S->getI64Imm(63, dl)), 0); 3171 SDValue SubtractCarry = 3172 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, 3173 LHS, RHS), 1); 3174 SDValue Adde = 3175 SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue, 3176 ShiftR, ShiftL, SubtractCarry), 0); 3177 return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, Adde), 0); 3178 } 3179 case ISD::SETGT: { 3180 // {subc.reg, subc.CA} = (subcarry %b, %a) 3181 // (zext (setcc %a, %b, setgt)) -> 3182 // -(xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1) 3183 // (zext (setcc %a, 0, setgt)) -> (ashr (nor (add %a, -1), %a), 63) 3184 if (IsRHSNegOne) 3185 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt); 3186 if (IsRHSZero) { 3187 SDValue Add = 3188 SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS, 3189 S->getI64Imm(-1, dl)), 0); 3190 SDValue Nor = 3191 SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Add, LHS), 0); 3192 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Nor, 3193 S->getI64Imm(63, dl)), 0); 3194 } 3195 std::swap(LHS, RHS); 3196 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); 3197 IsRHSZero = RHSConst && RHSConst->isNullValue(); 3198 IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1; 3199 LLVM_FALLTHROUGH; 3200 } 3201 case ISD::SETLT: { 3202 // {subc.reg, subc.CA} = (subcarry %a, %b) 3203 // (zext (setcc %a, %b, setlt)) -> 3204 // -(xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1) 3205 // (zext (setcc %a, 0, setlt)) -> (ashr %a, 63) 3206 if (IsRHSOne) 3207 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt); 3208 if (IsRHSZero) { 3209 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, LHS, 3210 S->getI64Imm(63, dl)), 0); 3211 } 3212 SDValue SRADINode = 3213 SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, 3214 LHS, S->getI64Imm(63, dl)), 0); 3215 SDValue SRDINode = 3216 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, 3217 RHS, S->getI64Imm(1, dl), 3218 S->getI64Imm(63, dl)), 0); 3219 SDValue SUBFC8Carry = 3220 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, 3221 RHS, LHS), 1); 3222 SDValue ADDE8Node = 3223 SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, 3224 SRDINode, SRADINode, SUBFC8Carry), 0); 3225 SDValue XORI8Node = 3226 SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, 3227 ADDE8Node, S->getI64Imm(1, dl)), 0); 3228 return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, 3229 XORI8Node), 0); 3230 } 3231 case ISD::SETUGE: 3232 // {subc.reg, subc.CA} = (subcarry %a, %b) 3233 // (sext (setcc %a, %b, setuge)) -> ~(sube %b, %b, subc.CA) 3234 std::swap(LHS, RHS); 3235 LLVM_FALLTHROUGH; 3236 case ISD::SETULE: { 3237 // {subc.reg, subc.CA} = (subcarry %b, %a) 3238 // (sext (setcc %a, %b, setule)) -> ~(sube %a, %a, subc.CA) 3239 SDValue SubtractCarry = 3240 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, 3241 LHS, RHS), 1); 3242 SDValue ExtSub = 3243 SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue, LHS, 3244 LHS, SubtractCarry), 0); 3245 return SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, 3246 ExtSub, ExtSub), 0); 3247 } 3248 case ISD::SETUGT: 3249 // {subc.reg, subc.CA} = (subcarry %b, %a) 3250 // (sext (setcc %a, %b, setugt)) -> (sube %b, %b, subc.CA) 3251 std::swap(LHS, RHS); 3252 LLVM_FALLTHROUGH; 3253 case ISD::SETULT: { 3254 // {subc.reg, subc.CA} = (subcarry %a, %b) 3255 // (sext (setcc %a, %b, setult)) -> (sube %a, %a, subc.CA) 3256 SDValue SubCarry = 3257 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, 3258 RHS, LHS), 1); 3259 return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, 3260 LHS, LHS, SubCarry), 0); 3261 } 3262 } 3263 } 3264 3265 /// Do all uses of this SDValue need the result in a GPR? 3266 /// This is meant to be used on values that have type i1 since 3267 /// it is somewhat meaningless to ask if values of other types 3268 /// should be kept in GPR's. 3269 static bool allUsesExtend(SDValue Compare, SelectionDAG *CurDAG) { 3270 assert(Compare.getOpcode() == ISD::SETCC && 3271 "An ISD::SETCC node required here."); 3272 3273 // For values that have a single use, the caller should obviously already have 3274 // checked if that use is an extending use. We check the other uses here. 3275 if (Compare.hasOneUse()) 3276 return true; 3277 // We want the value in a GPR if it is being extended, used for a select, or 3278 // used in logical operations. 3279 for (auto CompareUse : Compare.getNode()->uses()) 3280 if (CompareUse->getOpcode() != ISD::SIGN_EXTEND && 3281 CompareUse->getOpcode() != ISD::ZERO_EXTEND && 3282 CompareUse->getOpcode() != ISD::SELECT && 3283 !isLogicOp(CompareUse->getOpcode())) { 3284 OmittedForNonExtendUses++; 3285 return false; 3286 } 3287 return true; 3288 } 3289 3290 /// Returns an equivalent of a SETCC node but with the result the same width as 3291 /// the inputs. This can nalso be used for SELECT_CC if either the true or false 3292 /// values is a power of two while the other is zero. 3293 SDValue IntegerCompareEliminator::getSETCCInGPR(SDValue Compare, 3294 SetccInGPROpts ConvOpts) { 3295 assert((Compare.getOpcode() == ISD::SETCC || 3296 Compare.getOpcode() == ISD::SELECT_CC) && 3297 "An ISD::SETCC node required here."); 3298 3299 // Don't convert this comparison to a GPR sequence because there are uses 3300 // of the i1 result (i.e. uses that require the result in the CR). 3301 if ((Compare.getOpcode() == ISD::SETCC) && !allUsesExtend(Compare, CurDAG)) 3302 return SDValue(); 3303 3304 SDValue LHS = Compare.getOperand(0); 3305 SDValue RHS = Compare.getOperand(1); 3306 3307 // The condition code is operand 2 for SETCC and operand 4 for SELECT_CC. 3308 int CCOpNum = Compare.getOpcode() == ISD::SELECT_CC ? 4 : 2; 3309 ISD::CondCode CC = 3310 cast<CondCodeSDNode>(Compare.getOperand(CCOpNum))->get(); 3311 EVT InputVT = LHS.getValueType(); 3312 if (InputVT != MVT::i32 && InputVT != MVT::i64) 3313 return SDValue(); 3314 3315 if (ConvOpts == SetccInGPROpts::ZExtInvert || 3316 ConvOpts == SetccInGPROpts::SExtInvert) 3317 CC = ISD::getSetCCInverse(CC, true); 3318 3319 bool Inputs32Bit = InputVT == MVT::i32; 3320 3321 SDLoc dl(Compare); 3322 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); 3323 int64_t RHSValue = RHSConst ? RHSConst->getSExtValue() : INT64_MAX; 3324 bool IsSext = ConvOpts == SetccInGPROpts::SExtOrig || 3325 ConvOpts == SetccInGPROpts::SExtInvert; 3326 3327 if (IsSext && Inputs32Bit) 3328 return get32BitSExtCompare(LHS, RHS, CC, RHSValue, dl); 3329 else if (Inputs32Bit) 3330 return get32BitZExtCompare(LHS, RHS, CC, RHSValue, dl); 3331 else if (IsSext) 3332 return get64BitSExtCompare(LHS, RHS, CC, RHSValue, dl); 3333 return get64BitZExtCompare(LHS, RHS, CC, RHSValue, dl); 3334 } 3335 3336 } // end anonymous namespace 3337 3338 bool PPCDAGToDAGISel::tryIntCompareInGPR(SDNode *N) { 3339 if (N->getValueType(0) != MVT::i32 && 3340 N->getValueType(0) != MVT::i64) 3341 return false; 3342 3343 // This optimization will emit code that assumes 64-bit registers 3344 // so we don't want to run it in 32-bit mode. Also don't run it 3345 // on functions that are not to be optimized. 3346 if (TM.getOptLevel() == CodeGenOpt::None || !TM.isPPC64()) 3347 return false; 3348 3349 switch (N->getOpcode()) { 3350 default: break; 3351 case ISD::ZERO_EXTEND: 3352 case ISD::SIGN_EXTEND: 3353 case ISD::AND: 3354 case ISD::OR: 3355 case ISD::XOR: { 3356 IntegerCompareEliminator ICmpElim(CurDAG, this); 3357 if (SDNode *New = ICmpElim.Select(N)) { 3358 ReplaceNode(N, New); 3359 return true; 3360 } 3361 } 3362 } 3363 return false; 3364 } 3365 3366 bool PPCDAGToDAGISel::tryBitPermutation(SDNode *N) { 3367 if (N->getValueType(0) != MVT::i32 && 3368 N->getValueType(0) != MVT::i64) 3369 return false; 3370 3371 if (!UseBitPermRewriter) 3372 return false; 3373 3374 switch (N->getOpcode()) { 3375 default: break; 3376 case ISD::ROTL: 3377 case ISD::SHL: 3378 case ISD::SRL: 3379 case ISD::AND: 3380 case ISD::OR: { 3381 BitPermutationSelector BPS(CurDAG); 3382 if (SDNode *New = BPS.Select(N)) { 3383 ReplaceNode(N, New); 3384 return true; 3385 } 3386 return false; 3387 } 3388 } 3389 3390 return false; 3391 } 3392 3393 /// SelectCC - Select a comparison of the specified values with the specified 3394 /// condition code, returning the CR# of the expression. 3395 SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, 3396 const SDLoc &dl) { 3397 // Always select the LHS. 3398 unsigned Opc; 3399 3400 if (LHS.getValueType() == MVT::i32) { 3401 unsigned Imm; 3402 if (CC == ISD::SETEQ || CC == ISD::SETNE) { 3403 if (isInt32Immediate(RHS, Imm)) { 3404 // SETEQ/SETNE comparison with 16-bit immediate, fold it. 3405 if (isUInt<16>(Imm)) 3406 return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS, 3407 getI32Imm(Imm & 0xFFFF, dl)), 3408 0); 3409 // If this is a 16-bit signed immediate, fold it. 3410 if (isInt<16>((int)Imm)) 3411 return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS, 3412 getI32Imm(Imm & 0xFFFF, dl)), 3413 0); 3414 3415 // For non-equality comparisons, the default code would materialize the 3416 // constant, then compare against it, like this: 3417 // lis r2, 4660 3418 // ori r2, r2, 22136 3419 // cmpw cr0, r3, r2 3420 // Since we are just comparing for equality, we can emit this instead: 3421 // xoris r0,r3,0x1234 3422 // cmplwi cr0,r0,0x5678 3423 // beq cr0,L6 3424 SDValue Xor(CurDAG->getMachineNode(PPC::XORIS, dl, MVT::i32, LHS, 3425 getI32Imm(Imm >> 16, dl)), 0); 3426 return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, Xor, 3427 getI32Imm(Imm & 0xFFFF, dl)), 0); 3428 } 3429 Opc = PPC::CMPLW; 3430 } else if (ISD::isUnsignedIntSetCC(CC)) { 3431 if (isInt32Immediate(RHS, Imm) && isUInt<16>(Imm)) 3432 return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS, 3433 getI32Imm(Imm & 0xFFFF, dl)), 0); 3434 Opc = PPC::CMPLW; 3435 } else { 3436 int16_t SImm; 3437 if (isIntS16Immediate(RHS, SImm)) 3438 return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS, 3439 getI32Imm((int)SImm & 0xFFFF, 3440 dl)), 3441 0); 3442 Opc = PPC::CMPW; 3443 } 3444 } else if (LHS.getValueType() == MVT::i64) { 3445 uint64_t Imm; 3446 if (CC == ISD::SETEQ || CC == ISD::SETNE) { 3447 if (isInt64Immediate(RHS.getNode(), Imm)) { 3448 // SETEQ/SETNE comparison with 16-bit immediate, fold it. 3449 if (isUInt<16>(Imm)) 3450 return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS, 3451 getI32Imm(Imm & 0xFFFF, dl)), 3452 0); 3453 // If this is a 16-bit signed immediate, fold it. 3454 if (isInt<16>(Imm)) 3455 return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS, 3456 getI32Imm(Imm & 0xFFFF, dl)), 3457 0); 3458 3459 // For non-equality comparisons, the default code would materialize the 3460 // constant, then compare against it, like this: 3461 // lis r2, 4660 3462 // ori r2, r2, 22136 3463 // cmpd cr0, r3, r2 3464 // Since we are just comparing for equality, we can emit this instead: 3465 // xoris r0,r3,0x1234 3466 // cmpldi cr0,r0,0x5678 3467 // beq cr0,L6 3468 if (isUInt<32>(Imm)) { 3469 SDValue Xor(CurDAG->getMachineNode(PPC::XORIS8, dl, MVT::i64, LHS, 3470 getI64Imm(Imm >> 16, dl)), 0); 3471 return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, Xor, 3472 getI64Imm(Imm & 0xFFFF, dl)), 3473 0); 3474 } 3475 } 3476 Opc = PPC::CMPLD; 3477 } else if (ISD::isUnsignedIntSetCC(CC)) { 3478 if (isInt64Immediate(RHS.getNode(), Imm) && isUInt<16>(Imm)) 3479 return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS, 3480 getI64Imm(Imm & 0xFFFF, dl)), 0); 3481 Opc = PPC::CMPLD; 3482 } else { 3483 int16_t SImm; 3484 if (isIntS16Immediate(RHS, SImm)) 3485 return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS, 3486 getI64Imm(SImm & 0xFFFF, dl)), 3487 0); 3488 Opc = PPC::CMPD; 3489 } 3490 } else if (LHS.getValueType() == MVT::f32) { 3491 Opc = PPC::FCMPUS; 3492 } else { 3493 assert(LHS.getValueType() == MVT::f64 && "Unknown vt!"); 3494 Opc = PPCSubTarget->hasVSX() ? PPC::XSCMPUDP : PPC::FCMPUD; 3495 } 3496 return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0); 3497 } 3498 3499 static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC) { 3500 switch (CC) { 3501 case ISD::SETUEQ: 3502 case ISD::SETONE: 3503 case ISD::SETOLE: 3504 case ISD::SETOGE: 3505 llvm_unreachable("Should be lowered by legalize!"); 3506 default: llvm_unreachable("Unknown condition!"); 3507 case ISD::SETOEQ: 3508 case ISD::SETEQ: return PPC::PRED_EQ; 3509 case ISD::SETUNE: 3510 case ISD::SETNE: return PPC::PRED_NE; 3511 case ISD::SETOLT: 3512 case ISD::SETLT: return PPC::PRED_LT; 3513 case ISD::SETULE: 3514 case ISD::SETLE: return PPC::PRED_LE; 3515 case ISD::SETOGT: 3516 case ISD::SETGT: return PPC::PRED_GT; 3517 case ISD::SETUGE: 3518 case ISD::SETGE: return PPC::PRED_GE; 3519 case ISD::SETO: return PPC::PRED_NU; 3520 case ISD::SETUO: return PPC::PRED_UN; 3521 // These two are invalid for floating point. Assume we have int. 3522 case ISD::SETULT: return PPC::PRED_LT; 3523 case ISD::SETUGT: return PPC::PRED_GT; 3524 } 3525 } 3526 3527 /// getCRIdxForSetCC - Return the index of the condition register field 3528 /// associated with the SetCC condition, and whether or not the field is 3529 /// treated as inverted. That is, lt = 0; ge = 0 inverted. 3530 static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) { 3531 Invert = false; 3532 switch (CC) { 3533 default: llvm_unreachable("Unknown condition!"); 3534 case ISD::SETOLT: 3535 case ISD::SETLT: return 0; // Bit #0 = SETOLT 3536 case ISD::SETOGT: 3537 case ISD::SETGT: return 1; // Bit #1 = SETOGT 3538 case ISD::SETOEQ: 3539 case ISD::SETEQ: return 2; // Bit #2 = SETOEQ 3540 case ISD::SETUO: return 3; // Bit #3 = SETUO 3541 case ISD::SETUGE: 3542 case ISD::SETGE: Invert = true; return 0; // !Bit #0 = SETUGE 3543 case ISD::SETULE: 3544 case ISD::SETLE: Invert = true; return 1; // !Bit #1 = SETULE 3545 case ISD::SETUNE: 3546 case ISD::SETNE: Invert = true; return 2; // !Bit #2 = SETUNE 3547 case ISD::SETO: Invert = true; return 3; // !Bit #3 = SETO 3548 case ISD::SETUEQ: 3549 case ISD::SETOGE: 3550 case ISD::SETOLE: 3551 case ISD::SETONE: 3552 llvm_unreachable("Invalid branch code: should be expanded by legalize"); 3553 // These are invalid for floating point. Assume integer. 3554 case ISD::SETULT: return 0; 3555 case ISD::SETUGT: return 1; 3556 } 3557 } 3558 3559 // getVCmpInst: return the vector compare instruction for the specified 3560 // vector type and condition code. Since this is for altivec specific code, 3561 // only support the altivec types (v16i8, v8i16, v4i32, v2i64, and v4f32). 3562 static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC, 3563 bool HasVSX, bool &Swap, bool &Negate) { 3564 Swap = false; 3565 Negate = false; 3566 3567 if (VecVT.isFloatingPoint()) { 3568 /* Handle some cases by swapping input operands. */ 3569 switch (CC) { 3570 case ISD::SETLE: CC = ISD::SETGE; Swap = true; break; 3571 case ISD::SETLT: CC = ISD::SETGT; Swap = true; break; 3572 case ISD::SETOLE: CC = ISD::SETOGE; Swap = true; break; 3573 case ISD::SETOLT: CC = ISD::SETOGT; Swap = true; break; 3574 case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break; 3575 case ISD::SETUGT: CC = ISD::SETULT; Swap = true; break; 3576 default: break; 3577 } 3578 /* Handle some cases by negating the result. */ 3579 switch (CC) { 3580 case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break; 3581 case ISD::SETUNE: CC = ISD::SETOEQ; Negate = true; break; 3582 case ISD::SETULE: CC = ISD::SETOGT; Negate = true; break; 3583 case ISD::SETULT: CC = ISD::SETOGE; Negate = true; break; 3584 default: break; 3585 } 3586 /* We have instructions implementing the remaining cases. */ 3587 switch (CC) { 3588 case ISD::SETEQ: 3589 case ISD::SETOEQ: 3590 if (VecVT == MVT::v4f32) 3591 return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP; 3592 else if (VecVT == MVT::v2f64) 3593 return PPC::XVCMPEQDP; 3594 break; 3595 case ISD::SETGT: 3596 case ISD::SETOGT: 3597 if (VecVT == MVT::v4f32) 3598 return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP; 3599 else if (VecVT == MVT::v2f64) 3600 return PPC::XVCMPGTDP; 3601 break; 3602 case ISD::SETGE: 3603 case ISD::SETOGE: 3604 if (VecVT == MVT::v4f32) 3605 return HasVSX ? PPC::XVCMPGESP : PPC::VCMPGEFP; 3606 else if (VecVT == MVT::v2f64) 3607 return PPC::XVCMPGEDP; 3608 break; 3609 default: 3610 break; 3611 } 3612 llvm_unreachable("Invalid floating-point vector compare condition"); 3613 } else { 3614 /* Handle some cases by swapping input operands. */ 3615 switch (CC) { 3616 case ISD::SETGE: CC = ISD::SETLE; Swap = true; break; 3617 case ISD::SETLT: CC = ISD::SETGT; Swap = true; break; 3618 case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break; 3619 case ISD::SETULT: CC = ISD::SETUGT; Swap = true; break; 3620 default: break; 3621 } 3622 /* Handle some cases by negating the result. */ 3623 switch (CC) { 3624 case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break; 3625 case ISD::SETUNE: CC = ISD::SETUEQ; Negate = true; break; 3626 case ISD::SETLE: CC = ISD::SETGT; Negate = true; break; 3627 case ISD::SETULE: CC = ISD::SETUGT; Negate = true; break; 3628 default: break; 3629 } 3630 /* We have instructions implementing the remaining cases. */ 3631 switch (CC) { 3632 case ISD::SETEQ: 3633 case ISD::SETUEQ: 3634 if (VecVT == MVT::v16i8) 3635 return PPC::VCMPEQUB; 3636 else if (VecVT == MVT::v8i16) 3637 return PPC::VCMPEQUH; 3638 else if (VecVT == MVT::v4i32) 3639 return PPC::VCMPEQUW; 3640 else if (VecVT == MVT::v2i64) 3641 return PPC::VCMPEQUD; 3642 break; 3643 case ISD::SETGT: 3644 if (VecVT == MVT::v16i8) 3645 return PPC::VCMPGTSB; 3646 else if (VecVT == MVT::v8i16) 3647 return PPC::VCMPGTSH; 3648 else if (VecVT == MVT::v4i32) 3649 return PPC::VCMPGTSW; 3650 else if (VecVT == MVT::v2i64) 3651 return PPC::VCMPGTSD; 3652 break; 3653 case ISD::SETUGT: 3654 if (VecVT == MVT::v16i8) 3655 return PPC::VCMPGTUB; 3656 else if (VecVT == MVT::v8i16) 3657 return PPC::VCMPGTUH; 3658 else if (VecVT == MVT::v4i32) 3659 return PPC::VCMPGTUW; 3660 else if (VecVT == MVT::v2i64) 3661 return PPC::VCMPGTUD; 3662 break; 3663 default: 3664 break; 3665 } 3666 llvm_unreachable("Invalid integer vector compare condition"); 3667 } 3668 } 3669 3670 bool PPCDAGToDAGISel::trySETCC(SDNode *N) { 3671 SDLoc dl(N); 3672 unsigned Imm; 3673 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 3674 EVT PtrVT = 3675 CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout()); 3676 bool isPPC64 = (PtrVT == MVT::i64); 3677 3678 if (!PPCSubTarget->useCRBits() && 3679 isInt32Immediate(N->getOperand(1), Imm)) { 3680 // We can codegen setcc op, imm very efficiently compared to a brcond. 3681 // Check for those cases here. 3682 // setcc op, 0 3683 if (Imm == 0) { 3684 SDValue Op = N->getOperand(0); 3685 switch (CC) { 3686 default: break; 3687 case ISD::SETEQ: { 3688 Op = SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Op), 0); 3689 SDValue Ops[] = { Op, getI32Imm(27, dl), getI32Imm(5, dl), 3690 getI32Imm(31, dl) }; 3691 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); 3692 return true; 3693 } 3694 case ISD::SETNE: { 3695 if (isPPC64) break; 3696 SDValue AD = 3697 SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue, 3698 Op, getI32Imm(~0U, dl)), 0); 3699 CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, AD, Op, AD.getValue(1)); 3700 return true; 3701 } 3702 case ISD::SETLT: { 3703 SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl), 3704 getI32Imm(31, dl) }; 3705 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); 3706 return true; 3707 } 3708 case ISD::SETGT: { 3709 SDValue T = 3710 SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Op), 0); 3711 T = SDValue(CurDAG->getMachineNode(PPC::ANDC, dl, MVT::i32, T, Op), 0); 3712 SDValue Ops[] = { T, getI32Imm(1, dl), getI32Imm(31, dl), 3713 getI32Imm(31, dl) }; 3714 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); 3715 return true; 3716 } 3717 } 3718 } else if (Imm == ~0U) { // setcc op, -1 3719 SDValue Op = N->getOperand(0); 3720 switch (CC) { 3721 default: break; 3722 case ISD::SETEQ: 3723 if (isPPC64) break; 3724 Op = SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue, 3725 Op, getI32Imm(1, dl)), 0); 3726 CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, 3727 SDValue(CurDAG->getMachineNode(PPC::LI, dl, 3728 MVT::i32, 3729 getI32Imm(0, dl)), 3730 0), Op.getValue(1)); 3731 return true; 3732 case ISD::SETNE: { 3733 if (isPPC64) break; 3734 Op = SDValue(CurDAG->getMachineNode(PPC::NOR, dl, MVT::i32, Op, Op), 0); 3735 SDNode *AD = CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue, 3736 Op, getI32Imm(~0U, dl)); 3737 CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(AD, 0), Op, 3738 SDValue(AD, 1)); 3739 return true; 3740 } 3741 case ISD::SETLT: { 3742 SDValue AD = SDValue(CurDAG->getMachineNode(PPC::ADDI, dl, MVT::i32, Op, 3743 getI32Imm(1, dl)), 0); 3744 SDValue AN = SDValue(CurDAG->getMachineNode(PPC::AND, dl, MVT::i32, AD, 3745 Op), 0); 3746 SDValue Ops[] = { AN, getI32Imm(1, dl), getI32Imm(31, dl), 3747 getI32Imm(31, dl) }; 3748 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); 3749 return true; 3750 } 3751 case ISD::SETGT: { 3752 SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl), 3753 getI32Imm(31, dl) }; 3754 Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0); 3755 CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op, getI32Imm(1, dl)); 3756 return true; 3757 } 3758 } 3759 } 3760 } 3761 3762 SDValue LHS = N->getOperand(0); 3763 SDValue RHS = N->getOperand(1); 3764 3765 // Altivec Vector compare instructions do not set any CR register by default and 3766 // vector compare operations return the same type as the operands. 3767 if (LHS.getValueType().isVector()) { 3768 if (PPCSubTarget->hasQPX()) 3769 return false; 3770 3771 EVT VecVT = LHS.getValueType(); 3772 bool Swap, Negate; 3773 unsigned int VCmpInst = getVCmpInst(VecVT.getSimpleVT(), CC, 3774 PPCSubTarget->hasVSX(), Swap, Negate); 3775 if (Swap) 3776 std::swap(LHS, RHS); 3777 3778 EVT ResVT = VecVT.changeVectorElementTypeToInteger(); 3779 if (Negate) { 3780 SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, ResVT, LHS, RHS), 0); 3781 CurDAG->SelectNodeTo(N, PPCSubTarget->hasVSX() ? PPC::XXLNOR : PPC::VNOR, 3782 ResVT, VCmp, VCmp); 3783 return true; 3784 } 3785 3786 CurDAG->SelectNodeTo(N, VCmpInst, ResVT, LHS, RHS); 3787 return true; 3788 } 3789 3790 if (PPCSubTarget->useCRBits()) 3791 return false; 3792 3793 bool Inv; 3794 unsigned Idx = getCRIdxForSetCC(CC, Inv); 3795 SDValue CCReg = SelectCC(LHS, RHS, CC, dl); 3796 SDValue IntCR; 3797 3798 // Force the ccreg into CR7. 3799 SDValue CR7Reg = CurDAG->getRegister(PPC::CR7, MVT::i32); 3800 3801 SDValue InFlag(nullptr, 0); // Null incoming flag value. 3802 CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, CR7Reg, CCReg, 3803 InFlag).getValue(1); 3804 3805 IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg, 3806 CCReg), 0); 3807 3808 SDValue Ops[] = { IntCR, getI32Imm((32 - (3 - Idx)) & 31, dl), 3809 getI32Imm(31, dl), getI32Imm(31, dl) }; 3810 if (!Inv) { 3811 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); 3812 return true; 3813 } 3814 3815 // Get the specified bit. 3816 SDValue Tmp = 3817 SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0); 3818 CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1, dl)); 3819 return true; 3820 } 3821 3822 /// Does this node represent a load/store node whose address can be represented 3823 /// with a register plus an immediate that's a multiple of \p Val: 3824 bool PPCDAGToDAGISel::isOffsetMultipleOf(SDNode *N, unsigned Val) const { 3825 LoadSDNode *LDN = dyn_cast<LoadSDNode>(N); 3826 StoreSDNode *STN = dyn_cast<StoreSDNode>(N); 3827 SDValue AddrOp; 3828 if (LDN) 3829 AddrOp = LDN->getOperand(1); 3830 else if (STN) 3831 AddrOp = STN->getOperand(2); 3832 3833 short Imm = 0; 3834 if (AddrOp.getOpcode() == ISD::ADD) { 3835 // If op0 is a frame index that is under aligned, we can't do it either, 3836 // because it is translated to r31 or r1 + slot + offset. We won't know the 3837 // slot number until the stack frame is finalized. 3838 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(AddrOp.getOperand(0))) { 3839 const MachineFrameInfo &MFI = CurDAG->getMachineFunction().getFrameInfo(); 3840 unsigned SlotAlign = MFI.getObjectAlignment(FI->getIndex()); 3841 if ((SlotAlign % Val) != 0) 3842 return false; 3843 } 3844 return isIntS16Immediate(AddrOp.getOperand(1), Imm) && !(Imm % Val); 3845 } 3846 3847 // If the address comes from the outside, the offset will be zero. 3848 return AddrOp.getOpcode() == ISD::CopyFromReg; 3849 } 3850 3851 void PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) { 3852 // Transfer memoperands. 3853 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 3854 MemOp[0] = cast<MemSDNode>(N)->getMemOperand(); 3855 cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1); 3856 } 3857 3858 // Select - Convert the specified operand from a target-independent to a 3859 // target-specific node if it hasn't already been changed. 3860 void PPCDAGToDAGISel::Select(SDNode *N) { 3861 SDLoc dl(N); 3862 if (N->isMachineOpcode()) { 3863 N->setNodeId(-1); 3864 return; // Already selected. 3865 } 3866 3867 // In case any misguided DAG-level optimizations form an ADD with a 3868 // TargetConstant operand, crash here instead of miscompiling (by selecting 3869 // an r+r add instead of some kind of r+i add). 3870 if (N->getOpcode() == ISD::ADD && 3871 N->getOperand(1).getOpcode() == ISD::TargetConstant) 3872 llvm_unreachable("Invalid ADD with TargetConstant operand"); 3873 3874 // Try matching complex bit permutations before doing anything else. 3875 if (tryBitPermutation(N)) 3876 return; 3877 3878 // Try to emit integer compares as GPR-only sequences (i.e. no use of CR). 3879 if (tryIntCompareInGPR(N)) 3880 return; 3881 3882 switch (N->getOpcode()) { 3883 default: break; 3884 3885 case ISD::Constant: 3886 if (N->getValueType(0) == MVT::i64) { 3887 ReplaceNode(N, selectI64Imm(CurDAG, N)); 3888 return; 3889 } 3890 break; 3891 3892 case ISD::SETCC: 3893 if (trySETCC(N)) 3894 return; 3895 break; 3896 3897 case PPCISD::GlobalBaseReg: 3898 ReplaceNode(N, getGlobalBaseReg()); 3899 return; 3900 3901 case ISD::FrameIndex: 3902 selectFrameIndex(N, N); 3903 return; 3904 3905 case PPCISD::MFOCRF: { 3906 SDValue InFlag = N->getOperand(1); 3907 ReplaceNode(N, CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, 3908 N->getOperand(0), InFlag)); 3909 return; 3910 } 3911 3912 case PPCISD::READ_TIME_BASE: 3913 ReplaceNode(N, CurDAG->getMachineNode(PPC::ReadTB, dl, MVT::i32, MVT::i32, 3914 MVT::Other, N->getOperand(0))); 3915 return; 3916 3917 case PPCISD::SRA_ADDZE: { 3918 SDValue N0 = N->getOperand(0); 3919 SDValue ShiftAmt = 3920 CurDAG->getTargetConstant(*cast<ConstantSDNode>(N->getOperand(1))-> 3921 getConstantIntValue(), dl, 3922 N->getValueType(0)); 3923 if (N->getValueType(0) == MVT::i64) { 3924 SDNode *Op = 3925 CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, MVT::Glue, 3926 N0, ShiftAmt); 3927 CurDAG->SelectNodeTo(N, PPC::ADDZE8, MVT::i64, SDValue(Op, 0), 3928 SDValue(Op, 1)); 3929 return; 3930 } else { 3931 assert(N->getValueType(0) == MVT::i32 && 3932 "Expecting i64 or i32 in PPCISD::SRA_ADDZE"); 3933 SDNode *Op = 3934 CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue, 3935 N0, ShiftAmt); 3936 CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, SDValue(Op, 0), 3937 SDValue(Op, 1)); 3938 return; 3939 } 3940 } 3941 3942 case ISD::LOAD: { 3943 // Handle preincrement loads. 3944 LoadSDNode *LD = cast<LoadSDNode>(N); 3945 EVT LoadedVT = LD->getMemoryVT(); 3946 3947 // Normal loads are handled by code generated from the .td file. 3948 if (LD->getAddressingMode() != ISD::PRE_INC) 3949 break; 3950 3951 SDValue Offset = LD->getOffset(); 3952 if (Offset.getOpcode() == ISD::TargetConstant || 3953 Offset.getOpcode() == ISD::TargetGlobalAddress) { 3954 3955 unsigned Opcode; 3956 bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD; 3957 if (LD->getValueType(0) != MVT::i64) { 3958 // Handle PPC32 integer and normal FP loads. 3959 assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load"); 3960 switch (LoadedVT.getSimpleVT().SimpleTy) { 3961 default: llvm_unreachable("Invalid PPC load type!"); 3962 case MVT::f64: Opcode = PPC::LFDU; break; 3963 case MVT::f32: Opcode = PPC::LFSU; break; 3964 case MVT::i32: Opcode = PPC::LWZU; break; 3965 case MVT::i16: Opcode = isSExt ? PPC::LHAU : PPC::LHZU; break; 3966 case MVT::i1: 3967 case MVT::i8: Opcode = PPC::LBZU; break; 3968 } 3969 } else { 3970 assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!"); 3971 assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load"); 3972 switch (LoadedVT.getSimpleVT().SimpleTy) { 3973 default: llvm_unreachable("Invalid PPC load type!"); 3974 case MVT::i64: Opcode = PPC::LDU; break; 3975 case MVT::i32: Opcode = PPC::LWZU8; break; 3976 case MVT::i16: Opcode = isSExt ? PPC::LHAU8 : PPC::LHZU8; break; 3977 case MVT::i1: 3978 case MVT::i8: Opcode = PPC::LBZU8; break; 3979 } 3980 } 3981 3982 SDValue Chain = LD->getChain(); 3983 SDValue Base = LD->getBasePtr(); 3984 SDValue Ops[] = { Offset, Base, Chain }; 3985 SDNode *MN = CurDAG->getMachineNode( 3986 Opcode, dl, LD->getValueType(0), 3987 PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops); 3988 transferMemOperands(N, MN); 3989 ReplaceNode(N, MN); 3990 return; 3991 } else { 3992 unsigned Opcode; 3993 bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD; 3994 if (LD->getValueType(0) != MVT::i64) { 3995 // Handle PPC32 integer and normal FP loads. 3996 assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load"); 3997 switch (LoadedVT.getSimpleVT().SimpleTy) { 3998 default: llvm_unreachable("Invalid PPC load type!"); 3999 case MVT::v4f64: Opcode = PPC::QVLFDUX; break; // QPX 4000 case MVT::v4f32: Opcode = PPC::QVLFSUX; break; // QPX 4001 case MVT::f64: Opcode = PPC::LFDUX; break; 4002 case MVT::f32: Opcode = PPC::LFSUX; break; 4003 case MVT::i32: Opcode = PPC::LWZUX; break; 4004 case MVT::i16: Opcode = isSExt ? PPC::LHAUX : PPC::LHZUX; break; 4005 case MVT::i1: 4006 case MVT::i8: Opcode = PPC::LBZUX; break; 4007 } 4008 } else { 4009 assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!"); 4010 assert((!isSExt || LoadedVT == MVT::i16 || LoadedVT == MVT::i32) && 4011 "Invalid sext update load"); 4012 switch (LoadedVT.getSimpleVT().SimpleTy) { 4013 default: llvm_unreachable("Invalid PPC load type!"); 4014 case MVT::i64: Opcode = PPC::LDUX; break; 4015 case MVT::i32: Opcode = isSExt ? PPC::LWAUX : PPC::LWZUX8; break; 4016 case MVT::i16: Opcode = isSExt ? PPC::LHAUX8 : PPC::LHZUX8; break; 4017 case MVT::i1: 4018 case MVT::i8: Opcode = PPC::LBZUX8; break; 4019 } 4020 } 4021 4022 SDValue Chain = LD->getChain(); 4023 SDValue Base = LD->getBasePtr(); 4024 SDValue Ops[] = { Base, Offset, Chain }; 4025 SDNode *MN = CurDAG->getMachineNode( 4026 Opcode, dl, LD->getValueType(0), 4027 PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops); 4028 transferMemOperands(N, MN); 4029 ReplaceNode(N, MN); 4030 return; 4031 } 4032 } 4033 4034 case ISD::AND: { 4035 unsigned Imm, Imm2, SH, MB, ME; 4036 uint64_t Imm64; 4037 4038 // If this is an and of a value rotated between 0 and 31 bits and then and'd 4039 // with a mask, emit rlwinm 4040 if (isInt32Immediate(N->getOperand(1), Imm) && 4041 isRotateAndMask(N->getOperand(0).getNode(), Imm, false, SH, MB, ME)) { 4042 SDValue Val = N->getOperand(0).getOperand(0); 4043 SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl), 4044 getI32Imm(ME, dl) }; 4045 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); 4046 return; 4047 } 4048 // If this is just a masked value where the input is not handled above, and 4049 // is not a rotate-left (handled by a pattern in the .td file), emit rlwinm 4050 if (isInt32Immediate(N->getOperand(1), Imm) && 4051 isRunOfOnes(Imm, MB, ME) && 4052 N->getOperand(0).getOpcode() != ISD::ROTL) { 4053 SDValue Val = N->getOperand(0); 4054 SDValue Ops[] = { Val, getI32Imm(0, dl), getI32Imm(MB, dl), 4055 getI32Imm(ME, dl) }; 4056 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); 4057 return; 4058 } 4059 // If this is a 64-bit zero-extension mask, emit rldicl. 4060 if (isInt64Immediate(N->getOperand(1).getNode(), Imm64) && 4061 isMask_64(Imm64)) { 4062 SDValue Val = N->getOperand(0); 4063 MB = 64 - countTrailingOnes(Imm64); 4064 SH = 0; 4065 4066 if (Val.getOpcode() == ISD::ANY_EXTEND) { 4067 auto Op0 = Val.getOperand(0); 4068 if ( Op0.getOpcode() == ISD::SRL && 4069 isInt32Immediate(Op0.getOperand(1).getNode(), Imm) && Imm <= MB) { 4070 4071 auto ResultType = Val.getNode()->getValueType(0); 4072 auto ImDef = CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, 4073 ResultType); 4074 SDValue IDVal (ImDef, 0); 4075 4076 Val = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, 4077 ResultType, IDVal, Op0.getOperand(0), 4078 getI32Imm(1, dl)), 0); 4079 SH = 64 - Imm; 4080 } 4081 } 4082 4083 // If the operand is a logical right shift, we can fold it into this 4084 // instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb) 4085 // for n <= mb. The right shift is really a left rotate followed by a 4086 // mask, and this mask is a more-restrictive sub-mask of the mask implied 4087 // by the shift. 4088 if (Val.getOpcode() == ISD::SRL && 4089 isInt32Immediate(Val.getOperand(1).getNode(), Imm) && Imm <= MB) { 4090 assert(Imm < 64 && "Illegal shift amount"); 4091 Val = Val.getOperand(0); 4092 SH = 64 - Imm; 4093 } 4094 4095 SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl) }; 4096 CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops); 4097 return; 4098 } 4099 // If this is a negated 64-bit zero-extension mask, 4100 // i.e. the immediate is a sequence of ones from most significant side 4101 // and all zero for reminder, we should use rldicr. 4102 if (isInt64Immediate(N->getOperand(1).getNode(), Imm64) && 4103 isMask_64(~Imm64)) { 4104 SDValue Val = N->getOperand(0); 4105 MB = 63 - countTrailingOnes(~Imm64); 4106 SH = 0; 4107 SDValue Ops[] = { Val, getI32Imm(SH, dl), getI32Imm(MB, dl) }; 4108 CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops); 4109 return; 4110 } 4111 4112 // AND X, 0 -> 0, not "rlwinm 32". 4113 if (isInt32Immediate(N->getOperand(1), Imm) && (Imm == 0)) { 4114 ReplaceUses(SDValue(N, 0), N->getOperand(1)); 4115 return; 4116 } 4117 // ISD::OR doesn't get all the bitfield insertion fun. 4118 // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) might be a 4119 // bitfield insert. 4120 if (isInt32Immediate(N->getOperand(1), Imm) && 4121 N->getOperand(0).getOpcode() == ISD::OR && 4122 isInt32Immediate(N->getOperand(0).getOperand(1), Imm2)) { 4123 // The idea here is to check whether this is equivalent to: 4124 // (c1 & m) | (x & ~m) 4125 // where m is a run-of-ones mask. The logic here is that, for each bit in 4126 // c1 and c2: 4127 // - if both are 1, then the output will be 1. 4128 // - if both are 0, then the output will be 0. 4129 // - if the bit in c1 is 0, and the bit in c2 is 1, then the output will 4130 // come from x. 4131 // - if the bit in c1 is 1, and the bit in c2 is 0, then the output will 4132 // be 0. 4133 // If that last condition is never the case, then we can form m from the 4134 // bits that are the same between c1 and c2. 4135 unsigned MB, ME; 4136 if (isRunOfOnes(~(Imm^Imm2), MB, ME) && !(~Imm & Imm2)) { 4137 SDValue Ops[] = { N->getOperand(0).getOperand(0), 4138 N->getOperand(0).getOperand(1), 4139 getI32Imm(0, dl), getI32Imm(MB, dl), 4140 getI32Imm(ME, dl) }; 4141 ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops)); 4142 return; 4143 } 4144 } 4145 4146 // Other cases are autogenerated. 4147 break; 4148 } 4149 case ISD::OR: { 4150 if (N->getValueType(0) == MVT::i32) 4151 if (tryBitfieldInsert(N)) 4152 return; 4153 4154 int16_t Imm; 4155 if (N->getOperand(0)->getOpcode() == ISD::FrameIndex && 4156 isIntS16Immediate(N->getOperand(1), Imm)) { 4157 KnownBits LHSKnown; 4158 CurDAG->computeKnownBits(N->getOperand(0), LHSKnown); 4159 4160 // If this is equivalent to an add, then we can fold it with the 4161 // FrameIndex calculation. 4162 if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)Imm) == ~0ULL) { 4163 selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm); 4164 return; 4165 } 4166 } 4167 4168 // OR with a 32-bit immediate can be handled by ori + oris 4169 // without creating an immediate in a GPR. 4170 uint64_t Imm64 = 0; 4171 bool IsPPC64 = PPCSubTarget->isPPC64(); 4172 if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) && 4173 (Imm64 & ~0xFFFFFFFFuLL) == 0) { 4174 // If ImmHi (ImmHi) is zero, only one ori (oris) is generated later. 4175 uint64_t ImmHi = Imm64 >> 16; 4176 uint64_t ImmLo = Imm64 & 0xFFFF; 4177 if (ImmHi != 0 && ImmLo != 0) { 4178 SDNode *Lo = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, 4179 N->getOperand(0), 4180 getI16Imm(ImmLo, dl)); 4181 SDValue Ops1[] = { SDValue(Lo, 0), getI16Imm(ImmHi, dl)}; 4182 CurDAG->SelectNodeTo(N, PPC::ORIS8, MVT::i64, Ops1); 4183 return; 4184 } 4185 } 4186 4187 // Other cases are autogenerated. 4188 break; 4189 } 4190 case ISD::XOR: { 4191 // XOR with a 32-bit immediate can be handled by xori + xoris 4192 // without creating an immediate in a GPR. 4193 uint64_t Imm64 = 0; 4194 bool IsPPC64 = PPCSubTarget->isPPC64(); 4195 if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) && 4196 (Imm64 & ~0xFFFFFFFFuLL) == 0) { 4197 // If ImmHi (ImmHi) is zero, only one xori (xoris) is generated later. 4198 uint64_t ImmHi = Imm64 >> 16; 4199 uint64_t ImmLo = Imm64 & 0xFFFF; 4200 if (ImmHi != 0 && ImmLo != 0) { 4201 SDNode *Lo = CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, 4202 N->getOperand(0), 4203 getI16Imm(ImmLo, dl)); 4204 SDValue Ops1[] = { SDValue(Lo, 0), getI16Imm(ImmHi, dl)}; 4205 CurDAG->SelectNodeTo(N, PPC::XORIS8, MVT::i64, Ops1); 4206 return; 4207 } 4208 } 4209 4210 break; 4211 } 4212 case ISD::ADD: { 4213 int16_t Imm; 4214 if (N->getOperand(0)->getOpcode() == ISD::FrameIndex && 4215 isIntS16Immediate(N->getOperand(1), Imm)) { 4216 selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm); 4217 return; 4218 } 4219 4220 break; 4221 } 4222 case ISD::SHL: { 4223 unsigned Imm, SH, MB, ME; 4224 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) && 4225 isRotateAndMask(N, Imm, true, SH, MB, ME)) { 4226 SDValue Ops[] = { N->getOperand(0).getOperand(0), 4227 getI32Imm(SH, dl), getI32Imm(MB, dl), 4228 getI32Imm(ME, dl) }; 4229 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); 4230 return; 4231 } 4232 4233 // Other cases are autogenerated. 4234 break; 4235 } 4236 case ISD::SRL: { 4237 unsigned Imm, SH, MB, ME; 4238 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) && 4239 isRotateAndMask(N, Imm, true, SH, MB, ME)) { 4240 SDValue Ops[] = { N->getOperand(0).getOperand(0), 4241 getI32Imm(SH, dl), getI32Imm(MB, dl), 4242 getI32Imm(ME, dl) }; 4243 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); 4244 return; 4245 } 4246 4247 // Other cases are autogenerated. 4248 break; 4249 } 4250 // FIXME: Remove this once the ANDI glue bug is fixed: 4251 case PPCISD::ANDIo_1_EQ_BIT: 4252 case PPCISD::ANDIo_1_GT_BIT: { 4253 if (!ANDIGlueBug) 4254 break; 4255 4256 EVT InVT = N->getOperand(0).getValueType(); 4257 assert((InVT == MVT::i64 || InVT == MVT::i32) && 4258 "Invalid input type for ANDIo_1_EQ_BIT"); 4259 4260 unsigned Opcode = (InVT == MVT::i64) ? PPC::ANDIo8 : PPC::ANDIo; 4261 SDValue AndI(CurDAG->getMachineNode(Opcode, dl, InVT, MVT::Glue, 4262 N->getOperand(0), 4263 CurDAG->getTargetConstant(1, dl, InVT)), 4264 0); 4265 SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32); 4266 SDValue SRIdxVal = 4267 CurDAG->getTargetConstant(N->getOpcode() == PPCISD::ANDIo_1_EQ_BIT ? 4268 PPC::sub_eq : PPC::sub_gt, dl, MVT::i32); 4269 4270 CurDAG->SelectNodeTo(N, TargetOpcode::EXTRACT_SUBREG, MVT::i1, CR0Reg, 4271 SRIdxVal, SDValue(AndI.getNode(), 1) /* glue */); 4272 return; 4273 } 4274 case ISD::SELECT_CC: { 4275 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get(); 4276 EVT PtrVT = 4277 CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout()); 4278 bool isPPC64 = (PtrVT == MVT::i64); 4279 4280 // If this is a select of i1 operands, we'll pattern match it. 4281 if (PPCSubTarget->useCRBits() && 4282 N->getOperand(0).getValueType() == MVT::i1) 4283 break; 4284 4285 // Handle the setcc cases here. select_cc lhs, 0, 1, 0, cc 4286 if (!isPPC64) 4287 if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1))) 4288 if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N->getOperand(2))) 4289 if (ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N->getOperand(3))) 4290 if (N1C->isNullValue() && N3C->isNullValue() && 4291 N2C->getZExtValue() == 1ULL && CC == ISD::SETNE && 4292 // FIXME: Implement this optzn for PPC64. 4293 N->getValueType(0) == MVT::i32) { 4294 SDNode *Tmp = 4295 CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue, 4296 N->getOperand(0), getI32Imm(~0U, dl)); 4297 CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(Tmp, 0), 4298 N->getOperand(0), SDValue(Tmp, 1)); 4299 return; 4300 } 4301 4302 SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl); 4303 4304 if (N->getValueType(0) == MVT::i1) { 4305 // An i1 select is: (c & t) | (!c & f). 4306 bool Inv; 4307 unsigned Idx = getCRIdxForSetCC(CC, Inv); 4308 4309 unsigned SRI; 4310 switch (Idx) { 4311 default: llvm_unreachable("Invalid CC index"); 4312 case 0: SRI = PPC::sub_lt; break; 4313 case 1: SRI = PPC::sub_gt; break; 4314 case 2: SRI = PPC::sub_eq; break; 4315 case 3: SRI = PPC::sub_un; break; 4316 } 4317 4318 SDValue CCBit = CurDAG->getTargetExtractSubreg(SRI, dl, MVT::i1, CCReg); 4319 4320 SDValue NotCCBit(CurDAG->getMachineNode(PPC::CRNOR, dl, MVT::i1, 4321 CCBit, CCBit), 0); 4322 SDValue C = Inv ? NotCCBit : CCBit, 4323 NotC = Inv ? CCBit : NotCCBit; 4324 4325 SDValue CAndT(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1, 4326 C, N->getOperand(2)), 0); 4327 SDValue NotCAndF(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1, 4328 NotC, N->getOperand(3)), 0); 4329 4330 CurDAG->SelectNodeTo(N, PPC::CROR, MVT::i1, CAndT, NotCAndF); 4331 return; 4332 } 4333 4334 unsigned BROpc = getPredicateForSetCC(CC); 4335 4336 unsigned SelectCCOp; 4337 if (N->getValueType(0) == MVT::i32) 4338 SelectCCOp = PPC::SELECT_CC_I4; 4339 else if (N->getValueType(0) == MVT::i64) 4340 SelectCCOp = PPC::SELECT_CC_I8; 4341 else if (N->getValueType(0) == MVT::f32) 4342 if (PPCSubTarget->hasP8Vector()) 4343 SelectCCOp = PPC::SELECT_CC_VSSRC; 4344 else 4345 SelectCCOp = PPC::SELECT_CC_F4; 4346 else if (N->getValueType(0) == MVT::f64) 4347 if (PPCSubTarget->hasVSX()) 4348 SelectCCOp = PPC::SELECT_CC_VSFRC; 4349 else 4350 SelectCCOp = PPC::SELECT_CC_F8; 4351 else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4f64) 4352 SelectCCOp = PPC::SELECT_CC_QFRC; 4353 else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4f32) 4354 SelectCCOp = PPC::SELECT_CC_QSRC; 4355 else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4i1) 4356 SelectCCOp = PPC::SELECT_CC_QBRC; 4357 else if (N->getValueType(0) == MVT::v2f64 || 4358 N->getValueType(0) == MVT::v2i64) 4359 SelectCCOp = PPC::SELECT_CC_VSRC; 4360 else 4361 SelectCCOp = PPC::SELECT_CC_VRRC; 4362 4363 SDValue Ops[] = { CCReg, N->getOperand(2), N->getOperand(3), 4364 getI32Imm(BROpc, dl) }; 4365 CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops); 4366 return; 4367 } 4368 case ISD::VSELECT: 4369 if (PPCSubTarget->hasVSX()) { 4370 SDValue Ops[] = { N->getOperand(2), N->getOperand(1), N->getOperand(0) }; 4371 CurDAG->SelectNodeTo(N, PPC::XXSEL, N->getValueType(0), Ops); 4372 return; 4373 } 4374 break; 4375 4376 case ISD::VECTOR_SHUFFLE: 4377 if (PPCSubTarget->hasVSX() && (N->getValueType(0) == MVT::v2f64 || 4378 N->getValueType(0) == MVT::v2i64)) { 4379 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); 4380 4381 SDValue Op1 = N->getOperand(SVN->getMaskElt(0) < 2 ? 0 : 1), 4382 Op2 = N->getOperand(SVN->getMaskElt(1) < 2 ? 0 : 1); 4383 unsigned DM[2]; 4384 4385 for (int i = 0; i < 2; ++i) 4386 if (SVN->getMaskElt(i) <= 0 || SVN->getMaskElt(i) == 2) 4387 DM[i] = 0; 4388 else 4389 DM[i] = 1; 4390 4391 if (Op1 == Op2 && DM[0] == 0 && DM[1] == 0 && 4392 Op1.getOpcode() == ISD::SCALAR_TO_VECTOR && 4393 isa<LoadSDNode>(Op1.getOperand(0))) { 4394 LoadSDNode *LD = cast<LoadSDNode>(Op1.getOperand(0)); 4395 SDValue Base, Offset; 4396 4397 if (LD->isUnindexed() && LD->hasOneUse() && Op1.hasOneUse() && 4398 (LD->getMemoryVT() == MVT::f64 || 4399 LD->getMemoryVT() == MVT::i64) && 4400 SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) { 4401 SDValue Chain = LD->getChain(); 4402 SDValue Ops[] = { Base, Offset, Chain }; 4403 MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); 4404 MemOp[0] = LD->getMemOperand(); 4405 SDNode *NewN = CurDAG->SelectNodeTo(N, PPC::LXVDSX, 4406 N->getValueType(0), Ops); 4407 cast<MachineSDNode>(NewN)->setMemRefs(MemOp, MemOp + 1); 4408 return; 4409 } 4410 } 4411 4412 // For little endian, we must swap the input operands and adjust 4413 // the mask elements (reverse and invert them). 4414 if (PPCSubTarget->isLittleEndian()) { 4415 std::swap(Op1, Op2); 4416 unsigned tmp = DM[0]; 4417 DM[0] = 1 - DM[1]; 4418 DM[1] = 1 - tmp; 4419 } 4420 4421 SDValue DMV = CurDAG->getTargetConstant(DM[1] | (DM[0] << 1), dl, 4422 MVT::i32); 4423 SDValue Ops[] = { Op1, Op2, DMV }; 4424 CurDAG->SelectNodeTo(N, PPC::XXPERMDI, N->getValueType(0), Ops); 4425 return; 4426 } 4427 4428 break; 4429 case PPCISD::BDNZ: 4430 case PPCISD::BDZ: { 4431 bool IsPPC64 = PPCSubTarget->isPPC64(); 4432 SDValue Ops[] = { N->getOperand(1), N->getOperand(0) }; 4433 CurDAG->SelectNodeTo(N, N->getOpcode() == PPCISD::BDNZ 4434 ? (IsPPC64 ? PPC::BDNZ8 : PPC::BDNZ) 4435 : (IsPPC64 ? PPC::BDZ8 : PPC::BDZ), 4436 MVT::Other, Ops); 4437 return; 4438 } 4439 case PPCISD::COND_BRANCH: { 4440 // Op #0 is the Chain. 4441 // Op #1 is the PPC::PRED_* number. 4442 // Op #2 is the CR# 4443 // Op #3 is the Dest MBB 4444 // Op #4 is the Flag. 4445 // Prevent PPC::PRED_* from being selected into LI. 4446 unsigned PCC = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 4447 if (EnableBranchHint) 4448 PCC |= getBranchHint(PCC, FuncInfo, N->getOperand(3)); 4449 4450 SDValue Pred = getI32Imm(PCC, dl); 4451 SDValue Ops[] = { Pred, N->getOperand(2), N->getOperand(3), 4452 N->getOperand(0), N->getOperand(4) }; 4453 CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops); 4454 return; 4455 } 4456 case ISD::BR_CC: { 4457 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get(); 4458 unsigned PCC = getPredicateForSetCC(CC); 4459 4460 if (N->getOperand(2).getValueType() == MVT::i1) { 4461 unsigned Opc; 4462 bool Swap; 4463 switch (PCC) { 4464 default: llvm_unreachable("Unexpected Boolean-operand predicate"); 4465 case PPC::PRED_LT: Opc = PPC::CRANDC; Swap = true; break; 4466 case PPC::PRED_LE: Opc = PPC::CRORC; Swap = true; break; 4467 case PPC::PRED_EQ: Opc = PPC::CREQV; Swap = false; break; 4468 case PPC::PRED_GE: Opc = PPC::CRORC; Swap = false; break; 4469 case PPC::PRED_GT: Opc = PPC::CRANDC; Swap = false; break; 4470 case PPC::PRED_NE: Opc = PPC::CRXOR; Swap = false; break; 4471 } 4472 4473 SDValue BitComp(CurDAG->getMachineNode(Opc, dl, MVT::i1, 4474 N->getOperand(Swap ? 3 : 2), 4475 N->getOperand(Swap ? 2 : 3)), 0); 4476 CurDAG->SelectNodeTo(N, PPC::BC, MVT::Other, BitComp, N->getOperand(4), 4477 N->getOperand(0)); 4478 return; 4479 } 4480 4481 if (EnableBranchHint) 4482 PCC |= getBranchHint(PCC, FuncInfo, N->getOperand(4)); 4483 4484 SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl); 4485 SDValue Ops[] = { getI32Imm(PCC, dl), CondCode, 4486 N->getOperand(4), N->getOperand(0) }; 4487 CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops); 4488 return; 4489 } 4490 case ISD::BRIND: { 4491 // FIXME: Should custom lower this. 4492 SDValue Chain = N->getOperand(0); 4493 SDValue Target = N->getOperand(1); 4494 unsigned Opc = Target.getValueType() == MVT::i32 ? PPC::MTCTR : PPC::MTCTR8; 4495 unsigned Reg = Target.getValueType() == MVT::i32 ? PPC::BCTR : PPC::BCTR8; 4496 Chain = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Target, 4497 Chain), 0); 4498 CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain); 4499 return; 4500 } 4501 case PPCISD::TOC_ENTRY: { 4502 assert ((PPCSubTarget->isPPC64() || PPCSubTarget->isSVR4ABI()) && 4503 "Only supported for 64-bit ABI and 32-bit SVR4"); 4504 if (PPCSubTarget->isSVR4ABI() && !PPCSubTarget->isPPC64()) { 4505 SDValue GA = N->getOperand(0); 4506 SDNode *MN = CurDAG->getMachineNode(PPC::LWZtoc, dl, MVT::i32, GA, 4507 N->getOperand(1)); 4508 transferMemOperands(N, MN); 4509 ReplaceNode(N, MN); 4510 return; 4511 } 4512 4513 // For medium and large code model, we generate two instructions as 4514 // described below. Otherwise we allow SelectCodeCommon to handle this, 4515 // selecting one of LDtoc, LDtocJTI, LDtocCPT, and LDtocBA. 4516 CodeModel::Model CModel = TM.getCodeModel(); 4517 if (CModel != CodeModel::Medium && CModel != CodeModel::Large) 4518 break; 4519 4520 // The first source operand is a TargetGlobalAddress or a TargetJumpTable. 4521 // If it must be toc-referenced according to PPCSubTarget, we generate: 4522 // LDtocL(@sym, ADDIStocHA(%x2, @sym)) 4523 // Otherwise we generate: 4524 // ADDItocL(ADDIStocHA(%x2, @sym), @sym) 4525 SDValue GA = N->getOperand(0); 4526 SDValue TOCbase = N->getOperand(1); 4527 SDNode *Tmp = CurDAG->getMachineNode(PPC::ADDIStocHA, dl, MVT::i64, 4528 TOCbase, GA); 4529 4530 if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA) || 4531 CModel == CodeModel::Large) { 4532 SDNode *MN = CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA, 4533 SDValue(Tmp, 0)); 4534 transferMemOperands(N, MN); 4535 ReplaceNode(N, MN); 4536 return; 4537 } 4538 4539 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) { 4540 const GlobalValue *GV = G->getGlobal(); 4541 unsigned char GVFlags = PPCSubTarget->classifyGlobalReference(GV); 4542 if (GVFlags & PPCII::MO_NLP_FLAG) { 4543 SDNode *MN = CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA, 4544 SDValue(Tmp, 0)); 4545 transferMemOperands(N, MN); 4546 ReplaceNode(N, MN); 4547 return; 4548 } 4549 } 4550 4551 ReplaceNode(N, CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64, 4552 SDValue(Tmp, 0), GA)); 4553 return; 4554 } 4555 case PPCISD::PPC32_PICGOT: 4556 // Generate a PIC-safe GOT reference. 4557 assert(!PPCSubTarget->isPPC64() && PPCSubTarget->isSVR4ABI() && 4558 "PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4"); 4559 CurDAG->SelectNodeTo(N, PPC::PPC32PICGOT, 4560 PPCLowering->getPointerTy(CurDAG->getDataLayout()), 4561 MVT::i32); 4562 return; 4563 4564 case PPCISD::VADD_SPLAT: { 4565 // This expands into one of three sequences, depending on whether 4566 // the first operand is odd or even, positive or negative. 4567 assert(isa<ConstantSDNode>(N->getOperand(0)) && 4568 isa<ConstantSDNode>(N->getOperand(1)) && 4569 "Invalid operand on VADD_SPLAT!"); 4570 4571 int Elt = N->getConstantOperandVal(0); 4572 int EltSize = N->getConstantOperandVal(1); 4573 unsigned Opc1, Opc2, Opc3; 4574 EVT VT; 4575 4576 if (EltSize == 1) { 4577 Opc1 = PPC::VSPLTISB; 4578 Opc2 = PPC::VADDUBM; 4579 Opc3 = PPC::VSUBUBM; 4580 VT = MVT::v16i8; 4581 } else if (EltSize == 2) { 4582 Opc1 = PPC::VSPLTISH; 4583 Opc2 = PPC::VADDUHM; 4584 Opc3 = PPC::VSUBUHM; 4585 VT = MVT::v8i16; 4586 } else { 4587 assert(EltSize == 4 && "Invalid element size on VADD_SPLAT!"); 4588 Opc1 = PPC::VSPLTISW; 4589 Opc2 = PPC::VADDUWM; 4590 Opc3 = PPC::VSUBUWM; 4591 VT = MVT::v4i32; 4592 } 4593 4594 if ((Elt & 1) == 0) { 4595 // Elt is even, in the range [-32,-18] + [16,30]. 4596 // 4597 // Convert: VADD_SPLAT elt, size 4598 // Into: tmp = VSPLTIS[BHW] elt 4599 // VADDU[BHW]M tmp, tmp 4600 // Where: [BHW] = B for size = 1, H for size = 2, W for size = 4 4601 SDValue EltVal = getI32Imm(Elt >> 1, dl); 4602 SDNode *Tmp = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); 4603 SDValue TmpVal = SDValue(Tmp, 0); 4604 ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, TmpVal, TmpVal)); 4605 return; 4606 } else if (Elt > 0) { 4607 // Elt is odd and positive, in the range [17,31]. 4608 // 4609 // Convert: VADD_SPLAT elt, size 4610 // Into: tmp1 = VSPLTIS[BHW] elt-16 4611 // tmp2 = VSPLTIS[BHW] -16 4612 // VSUBU[BHW]M tmp1, tmp2 4613 SDValue EltVal = getI32Imm(Elt - 16, dl); 4614 SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); 4615 EltVal = getI32Imm(-16, dl); 4616 SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); 4617 ReplaceNode(N, CurDAG->getMachineNode(Opc3, dl, VT, SDValue(Tmp1, 0), 4618 SDValue(Tmp2, 0))); 4619 return; 4620 } else { 4621 // Elt is odd and negative, in the range [-31,-17]. 4622 // 4623 // Convert: VADD_SPLAT elt, size 4624 // Into: tmp1 = VSPLTIS[BHW] elt+16 4625 // tmp2 = VSPLTIS[BHW] -16 4626 // VADDU[BHW]M tmp1, tmp2 4627 SDValue EltVal = getI32Imm(Elt + 16, dl); 4628 SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); 4629 EltVal = getI32Imm(-16, dl); 4630 SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); 4631 ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, SDValue(Tmp1, 0), 4632 SDValue(Tmp2, 0))); 4633 return; 4634 } 4635 } 4636 } 4637 4638 SelectCode(N); 4639 } 4640 4641 // If the target supports the cmpb instruction, do the idiom recognition here. 4642 // We don't do this as a DAG combine because we don't want to do it as nodes 4643 // are being combined (because we might miss part of the eventual idiom). We 4644 // don't want to do it during instruction selection because we want to reuse 4645 // the logic for lowering the masking operations already part of the 4646 // instruction selector. 4647 SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) { 4648 SDLoc dl(N); 4649 4650 assert(N->getOpcode() == ISD::OR && 4651 "Only OR nodes are supported for CMPB"); 4652 4653 SDValue Res; 4654 if (!PPCSubTarget->hasCMPB()) 4655 return Res; 4656 4657 if (N->getValueType(0) != MVT::i32 && 4658 N->getValueType(0) != MVT::i64) 4659 return Res; 4660 4661 EVT VT = N->getValueType(0); 4662 4663 SDValue RHS, LHS; 4664 bool BytesFound[8] = {false, false, false, false, false, false, false, false}; 4665 uint64_t Mask = 0, Alt = 0; 4666 4667 auto IsByteSelectCC = [this](SDValue O, unsigned &b, 4668 uint64_t &Mask, uint64_t &Alt, 4669 SDValue &LHS, SDValue &RHS) { 4670 if (O.getOpcode() != ISD::SELECT_CC) 4671 return false; 4672 ISD::CondCode CC = cast<CondCodeSDNode>(O.getOperand(4))->get(); 4673 4674 if (!isa<ConstantSDNode>(O.getOperand(2)) || 4675 !isa<ConstantSDNode>(O.getOperand(3))) 4676 return false; 4677 4678 uint64_t PM = O.getConstantOperandVal(2); 4679 uint64_t PAlt = O.getConstantOperandVal(3); 4680 for (b = 0; b < 8; ++b) { 4681 uint64_t Mask = UINT64_C(0xFF) << (8*b); 4682 if (PM && (PM & Mask) == PM && (PAlt & Mask) == PAlt) 4683 break; 4684 } 4685 4686 if (b == 8) 4687 return false; 4688 Mask |= PM; 4689 Alt |= PAlt; 4690 4691 if (!isa<ConstantSDNode>(O.getOperand(1)) || 4692 O.getConstantOperandVal(1) != 0) { 4693 SDValue Op0 = O.getOperand(0), Op1 = O.getOperand(1); 4694 if (Op0.getOpcode() == ISD::TRUNCATE) 4695 Op0 = Op0.getOperand(0); 4696 if (Op1.getOpcode() == ISD::TRUNCATE) 4697 Op1 = Op1.getOperand(0); 4698 4699 if (Op0.getOpcode() == ISD::SRL && Op1.getOpcode() == ISD::SRL && 4700 Op0.getOperand(1) == Op1.getOperand(1) && CC == ISD::SETEQ && 4701 isa<ConstantSDNode>(Op0.getOperand(1))) { 4702 4703 unsigned Bits = Op0.getValueSizeInBits(); 4704 if (b != Bits/8-1) 4705 return false; 4706 if (Op0.getConstantOperandVal(1) != Bits-8) 4707 return false; 4708 4709 LHS = Op0.getOperand(0); 4710 RHS = Op1.getOperand(0); 4711 return true; 4712 } 4713 4714 // When we have small integers (i16 to be specific), the form present 4715 // post-legalization uses SETULT in the SELECT_CC for the 4716 // higher-order byte, depending on the fact that the 4717 // even-higher-order bytes are known to all be zero, for example: 4718 // select_cc (xor $lhs, $rhs), 256, 65280, 0, setult 4719 // (so when the second byte is the same, because all higher-order 4720 // bits from bytes 3 and 4 are known to be zero, the result of the 4721 // xor can be at most 255) 4722 if (Op0.getOpcode() == ISD::XOR && CC == ISD::SETULT && 4723 isa<ConstantSDNode>(O.getOperand(1))) { 4724 4725 uint64_t ULim = O.getConstantOperandVal(1); 4726 if (ULim != (UINT64_C(1) << b*8)) 4727 return false; 4728 4729 // Now we need to make sure that the upper bytes are known to be 4730 // zero. 4731 unsigned Bits = Op0.getValueSizeInBits(); 4732 if (!CurDAG->MaskedValueIsZero( 4733 Op0, APInt::getHighBitsSet(Bits, Bits - (b + 1) * 8))) 4734 return false; 4735 4736 LHS = Op0.getOperand(0); 4737 RHS = Op0.getOperand(1); 4738 return true; 4739 } 4740 4741 return false; 4742 } 4743 4744 if (CC != ISD::SETEQ) 4745 return false; 4746 4747 SDValue Op = O.getOperand(0); 4748 if (Op.getOpcode() == ISD::AND) { 4749 if (!isa<ConstantSDNode>(Op.getOperand(1))) 4750 return false; 4751 if (Op.getConstantOperandVal(1) != (UINT64_C(0xFF) << (8*b))) 4752 return false; 4753 4754 SDValue XOR = Op.getOperand(0); 4755 if (XOR.getOpcode() == ISD::TRUNCATE) 4756 XOR = XOR.getOperand(0); 4757 if (XOR.getOpcode() != ISD::XOR) 4758 return false; 4759 4760 LHS = XOR.getOperand(0); 4761 RHS = XOR.getOperand(1); 4762 return true; 4763 } else if (Op.getOpcode() == ISD::SRL) { 4764 if (!isa<ConstantSDNode>(Op.getOperand(1))) 4765 return false; 4766 unsigned Bits = Op.getValueSizeInBits(); 4767 if (b != Bits/8-1) 4768 return false; 4769 if (Op.getConstantOperandVal(1) != Bits-8) 4770 return false; 4771 4772 SDValue XOR = Op.getOperand(0); 4773 if (XOR.getOpcode() == ISD::TRUNCATE) 4774 XOR = XOR.getOperand(0); 4775 if (XOR.getOpcode() != ISD::XOR) 4776 return false; 4777 4778 LHS = XOR.getOperand(0); 4779 RHS = XOR.getOperand(1); 4780 return true; 4781 } 4782 4783 return false; 4784 }; 4785 4786 SmallVector<SDValue, 8> Queue(1, SDValue(N, 0)); 4787 while (!Queue.empty()) { 4788 SDValue V = Queue.pop_back_val(); 4789 4790 for (const SDValue &O : V.getNode()->ops()) { 4791 unsigned b; 4792 uint64_t M = 0, A = 0; 4793 SDValue OLHS, ORHS; 4794 if (O.getOpcode() == ISD::OR) { 4795 Queue.push_back(O); 4796 } else if (IsByteSelectCC(O, b, M, A, OLHS, ORHS)) { 4797 if (!LHS) { 4798 LHS = OLHS; 4799 RHS = ORHS; 4800 BytesFound[b] = true; 4801 Mask |= M; 4802 Alt |= A; 4803 } else if ((LHS == ORHS && RHS == OLHS) || 4804 (RHS == ORHS && LHS == OLHS)) { 4805 BytesFound[b] = true; 4806 Mask |= M; 4807 Alt |= A; 4808 } else { 4809 return Res; 4810 } 4811 } else { 4812 return Res; 4813 } 4814 } 4815 } 4816 4817 unsigned LastB = 0, BCnt = 0; 4818 for (unsigned i = 0; i < 8; ++i) 4819 if (BytesFound[LastB]) { 4820 ++BCnt; 4821 LastB = i; 4822 } 4823 4824 if (!LastB || BCnt < 2) 4825 return Res; 4826 4827 // Because we'll be zero-extending the output anyway if don't have a specific 4828 // value for each input byte (via the Mask), we can 'anyext' the inputs. 4829 if (LHS.getValueType() != VT) { 4830 LHS = CurDAG->getAnyExtOrTrunc(LHS, dl, VT); 4831 RHS = CurDAG->getAnyExtOrTrunc(RHS, dl, VT); 4832 } 4833 4834 Res = CurDAG->getNode(PPCISD::CMPB, dl, VT, LHS, RHS); 4835 4836 bool NonTrivialMask = ((int64_t) Mask) != INT64_C(-1); 4837 if (NonTrivialMask && !Alt) { 4838 // Res = Mask & CMPB 4839 Res = CurDAG->getNode(ISD::AND, dl, VT, Res, 4840 CurDAG->getConstant(Mask, dl, VT)); 4841 } else if (Alt) { 4842 // Res = (CMPB & Mask) | (~CMPB & Alt) 4843 // Which, as suggested here: 4844 // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge 4845 // can be written as: 4846 // Res = Alt ^ ((Alt ^ Mask) & CMPB) 4847 // useful because the (Alt ^ Mask) can be pre-computed. 4848 Res = CurDAG->getNode(ISD::AND, dl, VT, Res, 4849 CurDAG->getConstant(Mask ^ Alt, dl, VT)); 4850 Res = CurDAG->getNode(ISD::XOR, dl, VT, Res, 4851 CurDAG->getConstant(Alt, dl, VT)); 4852 } 4853 4854 return Res; 4855 } 4856 4857 // When CR bit registers are enabled, an extension of an i1 variable to a i32 4858 // or i64 value is lowered in terms of a SELECT_I[48] operation, and thus 4859 // involves constant materialization of a 0 or a 1 or both. If the result of 4860 // the extension is then operated upon by some operator that can be constant 4861 // folded with a constant 0 or 1, and that constant can be materialized using 4862 // only one instruction (like a zero or one), then we should fold in those 4863 // operations with the select. 4864 void PPCDAGToDAGISel::foldBoolExts(SDValue &Res, SDNode *&N) { 4865 if (!PPCSubTarget->useCRBits()) 4866 return; 4867 4868 if (N->getOpcode() != ISD::ZERO_EXTEND && 4869 N->getOpcode() != ISD::SIGN_EXTEND && 4870 N->getOpcode() != ISD::ANY_EXTEND) 4871 return; 4872 4873 if (N->getOperand(0).getValueType() != MVT::i1) 4874 return; 4875 4876 if (!N->hasOneUse()) 4877 return; 4878 4879 SDLoc dl(N); 4880 EVT VT = N->getValueType(0); 4881 SDValue Cond = N->getOperand(0); 4882 SDValue ConstTrue = 4883 CurDAG->getConstant(N->getOpcode() == ISD::SIGN_EXTEND ? -1 : 1, dl, VT); 4884 SDValue ConstFalse = CurDAG->getConstant(0, dl, VT); 4885 4886 do { 4887 SDNode *User = *N->use_begin(); 4888 if (User->getNumOperands() != 2) 4889 break; 4890 4891 auto TryFold = [this, N, User, dl](SDValue Val) { 4892 SDValue UserO0 = User->getOperand(0), UserO1 = User->getOperand(1); 4893 SDValue O0 = UserO0.getNode() == N ? Val : UserO0; 4894 SDValue O1 = UserO1.getNode() == N ? Val : UserO1; 4895 4896 return CurDAG->FoldConstantArithmetic(User->getOpcode(), dl, 4897 User->getValueType(0), 4898 O0.getNode(), O1.getNode()); 4899 }; 4900 4901 // FIXME: When the semantics of the interaction between select and undef 4902 // are clearly defined, it may turn out to be unnecessary to break here. 4903 SDValue TrueRes = TryFold(ConstTrue); 4904 if (!TrueRes || TrueRes.isUndef()) 4905 break; 4906 SDValue FalseRes = TryFold(ConstFalse); 4907 if (!FalseRes || FalseRes.isUndef()) 4908 break; 4909 4910 // For us to materialize these using one instruction, we must be able to 4911 // represent them as signed 16-bit integers. 4912 uint64_t True = cast<ConstantSDNode>(TrueRes)->getZExtValue(), 4913 False = cast<ConstantSDNode>(FalseRes)->getZExtValue(); 4914 if (!isInt<16>(True) || !isInt<16>(False)) 4915 break; 4916 4917 // We can replace User with a new SELECT node, and try again to see if we 4918 // can fold the select with its user. 4919 Res = CurDAG->getSelect(dl, User->getValueType(0), Cond, TrueRes, FalseRes); 4920 N = User; 4921 ConstTrue = TrueRes; 4922 ConstFalse = FalseRes; 4923 } while (N->hasOneUse()); 4924 } 4925 4926 void PPCDAGToDAGISel::PreprocessISelDAG() { 4927 SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode()); 4928 ++Position; 4929 4930 bool MadeChange = false; 4931 while (Position != CurDAG->allnodes_begin()) { 4932 SDNode *N = &*--Position; 4933 if (N->use_empty()) 4934 continue; 4935 4936 SDValue Res; 4937 switch (N->getOpcode()) { 4938 default: break; 4939 case ISD::OR: 4940 Res = combineToCMPB(N); 4941 break; 4942 } 4943 4944 if (!Res) 4945 foldBoolExts(Res, N); 4946 4947 if (Res) { 4948 DEBUG(dbgs() << "PPC DAG preprocessing replacing:\nOld: "); 4949 DEBUG(N->dump(CurDAG)); 4950 DEBUG(dbgs() << "\nNew: "); 4951 DEBUG(Res.getNode()->dump(CurDAG)); 4952 DEBUG(dbgs() << "\n"); 4953 4954 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res); 4955 MadeChange = true; 4956 } 4957 } 4958 4959 if (MadeChange) 4960 CurDAG->RemoveDeadNodes(); 4961 } 4962 4963 /// PostprocessISelDAG - Perform some late peephole optimizations 4964 /// on the DAG representation. 4965 void PPCDAGToDAGISel::PostprocessISelDAG() { 4966 // Skip peepholes at -O0. 4967 if (TM.getOptLevel() == CodeGenOpt::None) 4968 return; 4969 4970 PeepholePPC64(); 4971 PeepholeCROps(); 4972 PeepholePPC64ZExt(); 4973 } 4974 4975 // Check if all users of this node will become isel where the second operand 4976 // is the constant zero. If this is so, and if we can negate the condition, 4977 // then we can flip the true and false operands. This will allow the zero to 4978 // be folded with the isel so that we don't need to materialize a register 4979 // containing zero. 4980 bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode *N) { 4981 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); 4982 UI != UE; ++UI) { 4983 SDNode *User = *UI; 4984 if (!User->isMachineOpcode()) 4985 return false; 4986 if (User->getMachineOpcode() != PPC::SELECT_I4 && 4987 User->getMachineOpcode() != PPC::SELECT_I8) 4988 return false; 4989 4990 SDNode *Op2 = User->getOperand(2).getNode(); 4991 if (!Op2->isMachineOpcode()) 4992 return false; 4993 4994 if (Op2->getMachineOpcode() != PPC::LI && 4995 Op2->getMachineOpcode() != PPC::LI8) 4996 return false; 4997 4998 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op2->getOperand(0)); 4999 if (!C) 5000 return false; 5001 5002 if (!C->isNullValue()) 5003 return false; 5004 } 5005 5006 return true; 5007 } 5008 5009 void PPCDAGToDAGISel::SwapAllSelectUsers(SDNode *N) { 5010 SmallVector<SDNode *, 4> ToReplace; 5011 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); 5012 UI != UE; ++UI) { 5013 SDNode *User = *UI; 5014 assert((User->getMachineOpcode() == PPC::SELECT_I4 || 5015 User->getMachineOpcode() == PPC::SELECT_I8) && 5016 "Must have all select users"); 5017 ToReplace.push_back(User); 5018 } 5019 5020 for (SmallVector<SDNode *, 4>::iterator UI = ToReplace.begin(), 5021 UE = ToReplace.end(); UI != UE; ++UI) { 5022 SDNode *User = *UI; 5023 SDNode *ResNode = 5024 CurDAG->getMachineNode(User->getMachineOpcode(), SDLoc(User), 5025 User->getValueType(0), User->getOperand(0), 5026 User->getOperand(2), 5027 User->getOperand(1)); 5028 5029 DEBUG(dbgs() << "CR Peephole replacing:\nOld: "); 5030 DEBUG(User->dump(CurDAG)); 5031 DEBUG(dbgs() << "\nNew: "); 5032 DEBUG(ResNode->dump(CurDAG)); 5033 DEBUG(dbgs() << "\n"); 5034 5035 ReplaceUses(User, ResNode); 5036 } 5037 } 5038 5039 void PPCDAGToDAGISel::PeepholeCROps() { 5040 bool IsModified; 5041 do { 5042 IsModified = false; 5043 for (SDNode &Node : CurDAG->allnodes()) { 5044 MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node); 5045 if (!MachineNode || MachineNode->use_empty()) 5046 continue; 5047 SDNode *ResNode = MachineNode; 5048 5049 bool Op1Set = false, Op1Unset = false, 5050 Op1Not = false, 5051 Op2Set = false, Op2Unset = false, 5052 Op2Not = false; 5053 5054 unsigned Opcode = MachineNode->getMachineOpcode(); 5055 switch (Opcode) { 5056 default: break; 5057 case PPC::CRAND: 5058 case PPC::CRNAND: 5059 case PPC::CROR: 5060 case PPC::CRXOR: 5061 case PPC::CRNOR: 5062 case PPC::CREQV: 5063 case PPC::CRANDC: 5064 case PPC::CRORC: { 5065 SDValue Op = MachineNode->getOperand(1); 5066 if (Op.isMachineOpcode()) { 5067 if (Op.getMachineOpcode() == PPC::CRSET) 5068 Op2Set = true; 5069 else if (Op.getMachineOpcode() == PPC::CRUNSET) 5070 Op2Unset = true; 5071 else if (Op.getMachineOpcode() == PPC::CRNOR && 5072 Op.getOperand(0) == Op.getOperand(1)) 5073 Op2Not = true; 5074 } 5075 LLVM_FALLTHROUGH; 5076 } 5077 case PPC::BC: 5078 case PPC::BCn: 5079 case PPC::SELECT_I4: 5080 case PPC::SELECT_I8: 5081 case PPC::SELECT_F4: 5082 case PPC::SELECT_F8: 5083 case PPC::SELECT_QFRC: 5084 case PPC::SELECT_QSRC: 5085 case PPC::SELECT_QBRC: 5086 case PPC::SELECT_VRRC: 5087 case PPC::SELECT_VSFRC: 5088 case PPC::SELECT_VSSRC: 5089 case PPC::SELECT_VSRC: { 5090 SDValue Op = MachineNode->getOperand(0); 5091 if (Op.isMachineOpcode()) { 5092 if (Op.getMachineOpcode() == PPC::CRSET) 5093 Op1Set = true; 5094 else if (Op.getMachineOpcode() == PPC::CRUNSET) 5095 Op1Unset = true; 5096 else if (Op.getMachineOpcode() == PPC::CRNOR && 5097 Op.getOperand(0) == Op.getOperand(1)) 5098 Op1Not = true; 5099 } 5100 } 5101 break; 5102 } 5103 5104 bool SelectSwap = false; 5105 switch (Opcode) { 5106 default: break; 5107 case PPC::CRAND: 5108 if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) 5109 // x & x = x 5110 ResNode = MachineNode->getOperand(0).getNode(); 5111 else if (Op1Set) 5112 // 1 & y = y 5113 ResNode = MachineNode->getOperand(1).getNode(); 5114 else if (Op2Set) 5115 // x & 1 = x 5116 ResNode = MachineNode->getOperand(0).getNode(); 5117 else if (Op1Unset || Op2Unset) 5118 // x & 0 = 0 & y = 0 5119 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode), 5120 MVT::i1); 5121 else if (Op1Not) 5122 // ~x & y = andc(y, x) 5123 ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode), 5124 MVT::i1, MachineNode->getOperand(1), 5125 MachineNode->getOperand(0). 5126 getOperand(0)); 5127 else if (Op2Not) 5128 // x & ~y = andc(x, y) 5129 ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode), 5130 MVT::i1, MachineNode->getOperand(0), 5131 MachineNode->getOperand(1). 5132 getOperand(0)); 5133 else if (AllUsersSelectZero(MachineNode)) { 5134 ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode), 5135 MVT::i1, MachineNode->getOperand(0), 5136 MachineNode->getOperand(1)); 5137 SelectSwap = true; 5138 } 5139 break; 5140 case PPC::CRNAND: 5141 if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) 5142 // nand(x, x) -> nor(x, x) 5143 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 5144 MVT::i1, MachineNode->getOperand(0), 5145 MachineNode->getOperand(0)); 5146 else if (Op1Set) 5147 // nand(1, y) -> nor(y, y) 5148 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 5149 MVT::i1, MachineNode->getOperand(1), 5150 MachineNode->getOperand(1)); 5151 else if (Op2Set) 5152 // nand(x, 1) -> nor(x, x) 5153 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 5154 MVT::i1, MachineNode->getOperand(0), 5155 MachineNode->getOperand(0)); 5156 else if (Op1Unset || Op2Unset) 5157 // nand(x, 0) = nand(0, y) = 1 5158 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode), 5159 MVT::i1); 5160 else if (Op1Not) 5161 // nand(~x, y) = ~(~x & y) = x | ~y = orc(x, y) 5162 ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode), 5163 MVT::i1, MachineNode->getOperand(0). 5164 getOperand(0), 5165 MachineNode->getOperand(1)); 5166 else if (Op2Not) 5167 // nand(x, ~y) = ~x | y = orc(y, x) 5168 ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode), 5169 MVT::i1, MachineNode->getOperand(1). 5170 getOperand(0), 5171 MachineNode->getOperand(0)); 5172 else if (AllUsersSelectZero(MachineNode)) { 5173 ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode), 5174 MVT::i1, MachineNode->getOperand(0), 5175 MachineNode->getOperand(1)); 5176 SelectSwap = true; 5177 } 5178 break; 5179 case PPC::CROR: 5180 if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) 5181 // x | x = x 5182 ResNode = MachineNode->getOperand(0).getNode(); 5183 else if (Op1Set || Op2Set) 5184 // x | 1 = 1 | y = 1 5185 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode), 5186 MVT::i1); 5187 else if (Op1Unset) 5188 // 0 | y = y 5189 ResNode = MachineNode->getOperand(1).getNode(); 5190 else if (Op2Unset) 5191 // x | 0 = x 5192 ResNode = MachineNode->getOperand(0).getNode(); 5193 else if (Op1Not) 5194 // ~x | y = orc(y, x) 5195 ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode), 5196 MVT::i1, MachineNode->getOperand(1), 5197 MachineNode->getOperand(0). 5198 getOperand(0)); 5199 else if (Op2Not) 5200 // x | ~y = orc(x, y) 5201 ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode), 5202 MVT::i1, MachineNode->getOperand(0), 5203 MachineNode->getOperand(1). 5204 getOperand(0)); 5205 else if (AllUsersSelectZero(MachineNode)) { 5206 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 5207 MVT::i1, MachineNode->getOperand(0), 5208 MachineNode->getOperand(1)); 5209 SelectSwap = true; 5210 } 5211 break; 5212 case PPC::CRXOR: 5213 if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) 5214 // xor(x, x) = 0 5215 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode), 5216 MVT::i1); 5217 else if (Op1Set) 5218 // xor(1, y) -> nor(y, y) 5219 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 5220 MVT::i1, MachineNode->getOperand(1), 5221 MachineNode->getOperand(1)); 5222 else if (Op2Set) 5223 // xor(x, 1) -> nor(x, x) 5224 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 5225 MVT::i1, MachineNode->getOperand(0), 5226 MachineNode->getOperand(0)); 5227 else if (Op1Unset) 5228 // xor(0, y) = y 5229 ResNode = MachineNode->getOperand(1).getNode(); 5230 else if (Op2Unset) 5231 // xor(x, 0) = x 5232 ResNode = MachineNode->getOperand(0).getNode(); 5233 else if (Op1Not) 5234 // xor(~x, y) = eqv(x, y) 5235 ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode), 5236 MVT::i1, MachineNode->getOperand(0). 5237 getOperand(0), 5238 MachineNode->getOperand(1)); 5239 else if (Op2Not) 5240 // xor(x, ~y) = eqv(x, y) 5241 ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode), 5242 MVT::i1, MachineNode->getOperand(0), 5243 MachineNode->getOperand(1). 5244 getOperand(0)); 5245 else if (AllUsersSelectZero(MachineNode)) { 5246 ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode), 5247 MVT::i1, MachineNode->getOperand(0), 5248 MachineNode->getOperand(1)); 5249 SelectSwap = true; 5250 } 5251 break; 5252 case PPC::CRNOR: 5253 if (Op1Set || Op2Set) 5254 // nor(1, y) -> 0 5255 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode), 5256 MVT::i1); 5257 else if (Op1Unset) 5258 // nor(0, y) = ~y -> nor(y, y) 5259 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 5260 MVT::i1, MachineNode->getOperand(1), 5261 MachineNode->getOperand(1)); 5262 else if (Op2Unset) 5263 // nor(x, 0) = ~x 5264 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 5265 MVT::i1, MachineNode->getOperand(0), 5266 MachineNode->getOperand(0)); 5267 else if (Op1Not) 5268 // nor(~x, y) = andc(x, y) 5269 ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode), 5270 MVT::i1, MachineNode->getOperand(0). 5271 getOperand(0), 5272 MachineNode->getOperand(1)); 5273 else if (Op2Not) 5274 // nor(x, ~y) = andc(y, x) 5275 ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode), 5276 MVT::i1, MachineNode->getOperand(1). 5277 getOperand(0), 5278 MachineNode->getOperand(0)); 5279 else if (AllUsersSelectZero(MachineNode)) { 5280 ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode), 5281 MVT::i1, MachineNode->getOperand(0), 5282 MachineNode->getOperand(1)); 5283 SelectSwap = true; 5284 } 5285 break; 5286 case PPC::CREQV: 5287 if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) 5288 // eqv(x, x) = 1 5289 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode), 5290 MVT::i1); 5291 else if (Op1Set) 5292 // eqv(1, y) = y 5293 ResNode = MachineNode->getOperand(1).getNode(); 5294 else if (Op2Set) 5295 // eqv(x, 1) = x 5296 ResNode = MachineNode->getOperand(0).getNode(); 5297 else if (Op1Unset) 5298 // eqv(0, y) = ~y -> nor(y, y) 5299 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 5300 MVT::i1, MachineNode->getOperand(1), 5301 MachineNode->getOperand(1)); 5302 else if (Op2Unset) 5303 // eqv(x, 0) = ~x 5304 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 5305 MVT::i1, MachineNode->getOperand(0), 5306 MachineNode->getOperand(0)); 5307 else if (Op1Not) 5308 // eqv(~x, y) = xor(x, y) 5309 ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode), 5310 MVT::i1, MachineNode->getOperand(0). 5311 getOperand(0), 5312 MachineNode->getOperand(1)); 5313 else if (Op2Not) 5314 // eqv(x, ~y) = xor(x, y) 5315 ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode), 5316 MVT::i1, MachineNode->getOperand(0), 5317 MachineNode->getOperand(1). 5318 getOperand(0)); 5319 else if (AllUsersSelectZero(MachineNode)) { 5320 ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode), 5321 MVT::i1, MachineNode->getOperand(0), 5322 MachineNode->getOperand(1)); 5323 SelectSwap = true; 5324 } 5325 break; 5326 case PPC::CRANDC: 5327 if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) 5328 // andc(x, x) = 0 5329 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode), 5330 MVT::i1); 5331 else if (Op1Set) 5332 // andc(1, y) = ~y 5333 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 5334 MVT::i1, MachineNode->getOperand(1), 5335 MachineNode->getOperand(1)); 5336 else if (Op1Unset || Op2Set) 5337 // andc(0, y) = andc(x, 1) = 0 5338 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode), 5339 MVT::i1); 5340 else if (Op2Unset) 5341 // andc(x, 0) = x 5342 ResNode = MachineNode->getOperand(0).getNode(); 5343 else if (Op1Not) 5344 // andc(~x, y) = ~(x | y) = nor(x, y) 5345 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 5346 MVT::i1, MachineNode->getOperand(0). 5347 getOperand(0), 5348 MachineNode->getOperand(1)); 5349 else if (Op2Not) 5350 // andc(x, ~y) = x & y 5351 ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode), 5352 MVT::i1, MachineNode->getOperand(0), 5353 MachineNode->getOperand(1). 5354 getOperand(0)); 5355 else if (AllUsersSelectZero(MachineNode)) { 5356 ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode), 5357 MVT::i1, MachineNode->getOperand(1), 5358 MachineNode->getOperand(0)); 5359 SelectSwap = true; 5360 } 5361 break; 5362 case PPC::CRORC: 5363 if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) 5364 // orc(x, x) = 1 5365 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode), 5366 MVT::i1); 5367 else if (Op1Set || Op2Unset) 5368 // orc(1, y) = orc(x, 0) = 1 5369 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode), 5370 MVT::i1); 5371 else if (Op2Set) 5372 // orc(x, 1) = x 5373 ResNode = MachineNode->getOperand(0).getNode(); 5374 else if (Op1Unset) 5375 // orc(0, y) = ~y 5376 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 5377 MVT::i1, MachineNode->getOperand(1), 5378 MachineNode->getOperand(1)); 5379 else if (Op1Not) 5380 // orc(~x, y) = ~(x & y) = nand(x, y) 5381 ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode), 5382 MVT::i1, MachineNode->getOperand(0). 5383 getOperand(0), 5384 MachineNode->getOperand(1)); 5385 else if (Op2Not) 5386 // orc(x, ~y) = x | y 5387 ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode), 5388 MVT::i1, MachineNode->getOperand(0), 5389 MachineNode->getOperand(1). 5390 getOperand(0)); 5391 else if (AllUsersSelectZero(MachineNode)) { 5392 ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode), 5393 MVT::i1, MachineNode->getOperand(1), 5394 MachineNode->getOperand(0)); 5395 SelectSwap = true; 5396 } 5397 break; 5398 case PPC::SELECT_I4: 5399 case PPC::SELECT_I8: 5400 case PPC::SELECT_F4: 5401 case PPC::SELECT_F8: 5402 case PPC::SELECT_QFRC: 5403 case PPC::SELECT_QSRC: 5404 case PPC::SELECT_QBRC: 5405 case PPC::SELECT_VRRC: 5406 case PPC::SELECT_VSFRC: 5407 case PPC::SELECT_VSSRC: 5408 case PPC::SELECT_VSRC: 5409 if (Op1Set) 5410 ResNode = MachineNode->getOperand(1).getNode(); 5411 else if (Op1Unset) 5412 ResNode = MachineNode->getOperand(2).getNode(); 5413 else if (Op1Not) 5414 ResNode = CurDAG->getMachineNode(MachineNode->getMachineOpcode(), 5415 SDLoc(MachineNode), 5416 MachineNode->getValueType(0), 5417 MachineNode->getOperand(0). 5418 getOperand(0), 5419 MachineNode->getOperand(2), 5420 MachineNode->getOperand(1)); 5421 break; 5422 case PPC::BC: 5423 case PPC::BCn: 5424 if (Op1Not) 5425 ResNode = CurDAG->getMachineNode(Opcode == PPC::BC ? PPC::BCn : 5426 PPC::BC, 5427 SDLoc(MachineNode), 5428 MVT::Other, 5429 MachineNode->getOperand(0). 5430 getOperand(0), 5431 MachineNode->getOperand(1), 5432 MachineNode->getOperand(2)); 5433 // FIXME: Handle Op1Set, Op1Unset here too. 5434 break; 5435 } 5436 5437 // If we're inverting this node because it is used only by selects that 5438 // we'd like to swap, then swap the selects before the node replacement. 5439 if (SelectSwap) 5440 SwapAllSelectUsers(MachineNode); 5441 5442 if (ResNode != MachineNode) { 5443 DEBUG(dbgs() << "CR Peephole replacing:\nOld: "); 5444 DEBUG(MachineNode->dump(CurDAG)); 5445 DEBUG(dbgs() << "\nNew: "); 5446 DEBUG(ResNode->dump(CurDAG)); 5447 DEBUG(dbgs() << "\n"); 5448 5449 ReplaceUses(MachineNode, ResNode); 5450 IsModified = true; 5451 } 5452 } 5453 if (IsModified) 5454 CurDAG->RemoveDeadNodes(); 5455 } while (IsModified); 5456 } 5457 5458 // Gather the set of 32-bit operations that are known to have their 5459 // higher-order 32 bits zero, where ToPromote contains all such operations. 5460 static bool PeepholePPC64ZExtGather(SDValue Op32, 5461 SmallPtrSetImpl<SDNode *> &ToPromote) { 5462 if (!Op32.isMachineOpcode()) 5463 return false; 5464 5465 // First, check for the "frontier" instructions (those that will clear the 5466 // higher-order 32 bits. 5467 5468 // For RLWINM and RLWNM, we need to make sure that the mask does not wrap 5469 // around. If it does not, then these instructions will clear the 5470 // higher-order bits. 5471 if ((Op32.getMachineOpcode() == PPC::RLWINM || 5472 Op32.getMachineOpcode() == PPC::RLWNM) && 5473 Op32.getConstantOperandVal(2) <= Op32.getConstantOperandVal(3)) { 5474 ToPromote.insert(Op32.getNode()); 5475 return true; 5476 } 5477 5478 // SLW and SRW always clear the higher-order bits. 5479 if (Op32.getMachineOpcode() == PPC::SLW || 5480 Op32.getMachineOpcode() == PPC::SRW) { 5481 ToPromote.insert(Op32.getNode()); 5482 return true; 5483 } 5484 5485 // For LI and LIS, we need the immediate to be positive (so that it is not 5486 // sign extended). 5487 if (Op32.getMachineOpcode() == PPC::LI || 5488 Op32.getMachineOpcode() == PPC::LIS) { 5489 if (!isUInt<15>(Op32.getConstantOperandVal(0))) 5490 return false; 5491 5492 ToPromote.insert(Op32.getNode()); 5493 return true; 5494 } 5495 5496 // LHBRX and LWBRX always clear the higher-order bits. 5497 if (Op32.getMachineOpcode() == PPC::LHBRX || 5498 Op32.getMachineOpcode() == PPC::LWBRX) { 5499 ToPromote.insert(Op32.getNode()); 5500 return true; 5501 } 5502 5503 // CNT[LT]ZW always produce a 64-bit value in [0,32], and so is zero extended. 5504 if (Op32.getMachineOpcode() == PPC::CNTLZW || 5505 Op32.getMachineOpcode() == PPC::CNTTZW) { 5506 ToPromote.insert(Op32.getNode()); 5507 return true; 5508 } 5509 5510 // Next, check for those instructions we can look through. 5511 5512 // Assuming the mask does not wrap around, then the higher-order bits are 5513 // taken directly from the first operand. 5514 if (Op32.getMachineOpcode() == PPC::RLWIMI && 5515 Op32.getConstantOperandVal(3) <= Op32.getConstantOperandVal(4)) { 5516 SmallPtrSet<SDNode *, 16> ToPromote1; 5517 if (!PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1)) 5518 return false; 5519 5520 ToPromote.insert(Op32.getNode()); 5521 ToPromote.insert(ToPromote1.begin(), ToPromote1.end()); 5522 return true; 5523 } 5524 5525 // For OR, the higher-order bits are zero if that is true for both operands. 5526 // For SELECT_I4, the same is true (but the relevant operand numbers are 5527 // shifted by 1). 5528 if (Op32.getMachineOpcode() == PPC::OR || 5529 Op32.getMachineOpcode() == PPC::SELECT_I4) { 5530 unsigned B = Op32.getMachineOpcode() == PPC::SELECT_I4 ? 1 : 0; 5531 SmallPtrSet<SDNode *, 16> ToPromote1; 5532 if (!PeepholePPC64ZExtGather(Op32.getOperand(B+0), ToPromote1)) 5533 return false; 5534 if (!PeepholePPC64ZExtGather(Op32.getOperand(B+1), ToPromote1)) 5535 return false; 5536 5537 ToPromote.insert(Op32.getNode()); 5538 ToPromote.insert(ToPromote1.begin(), ToPromote1.end()); 5539 return true; 5540 } 5541 5542 // For ORI and ORIS, we need the higher-order bits of the first operand to be 5543 // zero, and also for the constant to be positive (so that it is not sign 5544 // extended). 5545 if (Op32.getMachineOpcode() == PPC::ORI || 5546 Op32.getMachineOpcode() == PPC::ORIS) { 5547 SmallPtrSet<SDNode *, 16> ToPromote1; 5548 if (!PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1)) 5549 return false; 5550 if (!isUInt<15>(Op32.getConstantOperandVal(1))) 5551 return false; 5552 5553 ToPromote.insert(Op32.getNode()); 5554 ToPromote.insert(ToPromote1.begin(), ToPromote1.end()); 5555 return true; 5556 } 5557 5558 // The higher-order bits of AND are zero if that is true for at least one of 5559 // the operands. 5560 if (Op32.getMachineOpcode() == PPC::AND) { 5561 SmallPtrSet<SDNode *, 16> ToPromote1, ToPromote2; 5562 bool Op0OK = 5563 PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1); 5564 bool Op1OK = 5565 PeepholePPC64ZExtGather(Op32.getOperand(1), ToPromote2); 5566 if (!Op0OK && !Op1OK) 5567 return false; 5568 5569 ToPromote.insert(Op32.getNode()); 5570 5571 if (Op0OK) 5572 ToPromote.insert(ToPromote1.begin(), ToPromote1.end()); 5573 5574 if (Op1OK) 5575 ToPromote.insert(ToPromote2.begin(), ToPromote2.end()); 5576 5577 return true; 5578 } 5579 5580 // For ANDI and ANDIS, the higher-order bits are zero if either that is true 5581 // of the first operand, or if the second operand is positive (so that it is 5582 // not sign extended). 5583 if (Op32.getMachineOpcode() == PPC::ANDIo || 5584 Op32.getMachineOpcode() == PPC::ANDISo) { 5585 SmallPtrSet<SDNode *, 16> ToPromote1; 5586 bool Op0OK = 5587 PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1); 5588 bool Op1OK = isUInt<15>(Op32.getConstantOperandVal(1)); 5589 if (!Op0OK && !Op1OK) 5590 return false; 5591 5592 ToPromote.insert(Op32.getNode()); 5593 5594 if (Op0OK) 5595 ToPromote.insert(ToPromote1.begin(), ToPromote1.end()); 5596 5597 return true; 5598 } 5599 5600 return false; 5601 } 5602 5603 void PPCDAGToDAGISel::PeepholePPC64ZExt() { 5604 if (!PPCSubTarget->isPPC64()) 5605 return; 5606 5607 // When we zero-extend from i32 to i64, we use a pattern like this: 5608 // def : Pat<(i64 (zext i32:$in)), 5609 // (RLDICL (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $in, sub_32), 5610 // 0, 32)>; 5611 // There are several 32-bit shift/rotate instructions, however, that will 5612 // clear the higher-order bits of their output, rendering the RLDICL 5613 // unnecessary. When that happens, we remove it here, and redefine the 5614 // relevant 32-bit operation to be a 64-bit operation. 5615 5616 SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode()); 5617 ++Position; 5618 5619 bool MadeChange = false; 5620 while (Position != CurDAG->allnodes_begin()) { 5621 SDNode *N = &*--Position; 5622 // Skip dead nodes and any non-machine opcodes. 5623 if (N->use_empty() || !N->isMachineOpcode()) 5624 continue; 5625 5626 if (N->getMachineOpcode() != PPC::RLDICL) 5627 continue; 5628 5629 if (N->getConstantOperandVal(1) != 0 || 5630 N->getConstantOperandVal(2) != 32) 5631 continue; 5632 5633 SDValue ISR = N->getOperand(0); 5634 if (!ISR.isMachineOpcode() || 5635 ISR.getMachineOpcode() != TargetOpcode::INSERT_SUBREG) 5636 continue; 5637 5638 if (!ISR.hasOneUse()) 5639 continue; 5640 5641 if (ISR.getConstantOperandVal(2) != PPC::sub_32) 5642 continue; 5643 5644 SDValue IDef = ISR.getOperand(0); 5645 if (!IDef.isMachineOpcode() || 5646 IDef.getMachineOpcode() != TargetOpcode::IMPLICIT_DEF) 5647 continue; 5648 5649 // We now know that we're looking at a canonical i32 -> i64 zext. See if we 5650 // can get rid of it. 5651 5652 SDValue Op32 = ISR->getOperand(1); 5653 if (!Op32.isMachineOpcode()) 5654 continue; 5655 5656 // There are some 32-bit instructions that always clear the high-order 32 5657 // bits, there are also some instructions (like AND) that we can look 5658 // through. 5659 SmallPtrSet<SDNode *, 16> ToPromote; 5660 if (!PeepholePPC64ZExtGather(Op32, ToPromote)) 5661 continue; 5662 5663 // If the ToPromote set contains nodes that have uses outside of the set 5664 // (except for the original INSERT_SUBREG), then abort the transformation. 5665 bool OutsideUse = false; 5666 for (SDNode *PN : ToPromote) { 5667 for (SDNode *UN : PN->uses()) { 5668 if (!ToPromote.count(UN) && UN != ISR.getNode()) { 5669 OutsideUse = true; 5670 break; 5671 } 5672 } 5673 5674 if (OutsideUse) 5675 break; 5676 } 5677 if (OutsideUse) 5678 continue; 5679 5680 MadeChange = true; 5681 5682 // We now know that this zero extension can be removed by promoting to 5683 // nodes in ToPromote to 64-bit operations, where for operations in the 5684 // frontier of the set, we need to insert INSERT_SUBREGs for their 5685 // operands. 5686 for (SDNode *PN : ToPromote) { 5687 unsigned NewOpcode; 5688 switch (PN->getMachineOpcode()) { 5689 default: 5690 llvm_unreachable("Don't know the 64-bit variant of this instruction"); 5691 case PPC::RLWINM: NewOpcode = PPC::RLWINM8; break; 5692 case PPC::RLWNM: NewOpcode = PPC::RLWNM8; break; 5693 case PPC::SLW: NewOpcode = PPC::SLW8; break; 5694 case PPC::SRW: NewOpcode = PPC::SRW8; break; 5695 case PPC::LI: NewOpcode = PPC::LI8; break; 5696 case PPC::LIS: NewOpcode = PPC::LIS8; break; 5697 case PPC::LHBRX: NewOpcode = PPC::LHBRX8; break; 5698 case PPC::LWBRX: NewOpcode = PPC::LWBRX8; break; 5699 case PPC::CNTLZW: NewOpcode = PPC::CNTLZW8; break; 5700 case PPC::CNTTZW: NewOpcode = PPC::CNTTZW8; break; 5701 case PPC::RLWIMI: NewOpcode = PPC::RLWIMI8; break; 5702 case PPC::OR: NewOpcode = PPC::OR8; break; 5703 case PPC::SELECT_I4: NewOpcode = PPC::SELECT_I8; break; 5704 case PPC::ORI: NewOpcode = PPC::ORI8; break; 5705 case PPC::ORIS: NewOpcode = PPC::ORIS8; break; 5706 case PPC::AND: NewOpcode = PPC::AND8; break; 5707 case PPC::ANDIo: NewOpcode = PPC::ANDIo8; break; 5708 case PPC::ANDISo: NewOpcode = PPC::ANDISo8; break; 5709 } 5710 5711 // Note: During the replacement process, the nodes will be in an 5712 // inconsistent state (some instructions will have operands with values 5713 // of the wrong type). Once done, however, everything should be right 5714 // again. 5715 5716 SmallVector<SDValue, 4> Ops; 5717 for (const SDValue &V : PN->ops()) { 5718 if (!ToPromote.count(V.getNode()) && V.getValueType() == MVT::i32 && 5719 !isa<ConstantSDNode>(V)) { 5720 SDValue ReplOpOps[] = { ISR.getOperand(0), V, ISR.getOperand(2) }; 5721 SDNode *ReplOp = 5722 CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, SDLoc(V), 5723 ISR.getNode()->getVTList(), ReplOpOps); 5724 Ops.push_back(SDValue(ReplOp, 0)); 5725 } else { 5726 Ops.push_back(V); 5727 } 5728 } 5729 5730 // Because all to-be-promoted nodes only have users that are other 5731 // promoted nodes (or the original INSERT_SUBREG), we can safely replace 5732 // the i32 result value type with i64. 5733 5734 SmallVector<EVT, 2> NewVTs; 5735 SDVTList VTs = PN->getVTList(); 5736 for (unsigned i = 0, ie = VTs.NumVTs; i != ie; ++i) 5737 if (VTs.VTs[i] == MVT::i32) 5738 NewVTs.push_back(MVT::i64); 5739 else 5740 NewVTs.push_back(VTs.VTs[i]); 5741 5742 DEBUG(dbgs() << "PPC64 ZExt Peephole morphing:\nOld: "); 5743 DEBUG(PN->dump(CurDAG)); 5744 5745 CurDAG->SelectNodeTo(PN, NewOpcode, CurDAG->getVTList(NewVTs), Ops); 5746 5747 DEBUG(dbgs() << "\nNew: "); 5748 DEBUG(PN->dump(CurDAG)); 5749 DEBUG(dbgs() << "\n"); 5750 } 5751 5752 // Now we replace the original zero extend and its associated INSERT_SUBREG 5753 // with the value feeding the INSERT_SUBREG (which has now been promoted to 5754 // return an i64). 5755 5756 DEBUG(dbgs() << "PPC64 ZExt Peephole replacing:\nOld: "); 5757 DEBUG(N->dump(CurDAG)); 5758 DEBUG(dbgs() << "\nNew: "); 5759 DEBUG(Op32.getNode()->dump(CurDAG)); 5760 DEBUG(dbgs() << "\n"); 5761 5762 ReplaceUses(N, Op32.getNode()); 5763 } 5764 5765 if (MadeChange) 5766 CurDAG->RemoveDeadNodes(); 5767 } 5768 5769 void PPCDAGToDAGISel::PeepholePPC64() { 5770 // These optimizations are currently supported only for 64-bit SVR4. 5771 if (PPCSubTarget->isDarwin() || !PPCSubTarget->isPPC64()) 5772 return; 5773 5774 SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode()); 5775 ++Position; 5776 5777 while (Position != CurDAG->allnodes_begin()) { 5778 SDNode *N = &*--Position; 5779 // Skip dead nodes and any non-machine opcodes. 5780 if (N->use_empty() || !N->isMachineOpcode()) 5781 continue; 5782 5783 unsigned FirstOp; 5784 unsigned StorageOpcode = N->getMachineOpcode(); 5785 5786 switch (StorageOpcode) { 5787 default: continue; 5788 5789 case PPC::LBZ: 5790 case PPC::LBZ8: 5791 case PPC::LD: 5792 case PPC::LFD: 5793 case PPC::LFS: 5794 case PPC::LHA: 5795 case PPC::LHA8: 5796 case PPC::LHZ: 5797 case PPC::LHZ8: 5798 case PPC::LWA: 5799 case PPC::LWZ: 5800 case PPC::LWZ8: 5801 FirstOp = 0; 5802 break; 5803 5804 case PPC::STB: 5805 case PPC::STB8: 5806 case PPC::STD: 5807 case PPC::STFD: 5808 case PPC::STFS: 5809 case PPC::STH: 5810 case PPC::STH8: 5811 case PPC::STW: 5812 case PPC::STW8: 5813 FirstOp = 1; 5814 break; 5815 } 5816 5817 // If this is a load or store with a zero offset, or within the alignment, 5818 // we may be able to fold an add-immediate into the memory operation. 5819 // The check against alignment is below, as it can't occur until we check 5820 // the arguments to N 5821 if (!isa<ConstantSDNode>(N->getOperand(FirstOp))) 5822 continue; 5823 5824 SDValue Base = N->getOperand(FirstOp + 1); 5825 if (!Base.isMachineOpcode()) 5826 continue; 5827 5828 unsigned Flags = 0; 5829 bool ReplaceFlags = true; 5830 5831 // When the feeding operation is an add-immediate of some sort, 5832 // determine whether we need to add relocation information to the 5833 // target flags on the immediate operand when we fold it into the 5834 // load instruction. 5835 // 5836 // For something like ADDItocL, the relocation information is 5837 // inferred from the opcode; when we process it in the AsmPrinter, 5838 // we add the necessary relocation there. A load, though, can receive 5839 // relocation from various flavors of ADDIxxx, so we need to carry 5840 // the relocation information in the target flags. 5841 switch (Base.getMachineOpcode()) { 5842 default: continue; 5843 5844 case PPC::ADDI8: 5845 case PPC::ADDI: 5846 // In some cases (such as TLS) the relocation information 5847 // is already in place on the operand, so copying the operand 5848 // is sufficient. 5849 ReplaceFlags = false; 5850 // For these cases, the immediate may not be divisible by 4, in 5851 // which case the fold is illegal for DS-form instructions. (The 5852 // other cases provide aligned addresses and are always safe.) 5853 if ((StorageOpcode == PPC::LWA || 5854 StorageOpcode == PPC::LD || 5855 StorageOpcode == PPC::STD) && 5856 (!isa<ConstantSDNode>(Base.getOperand(1)) || 5857 Base.getConstantOperandVal(1) % 4 != 0)) 5858 continue; 5859 break; 5860 case PPC::ADDIdtprelL: 5861 Flags = PPCII::MO_DTPREL_LO; 5862 break; 5863 case PPC::ADDItlsldL: 5864 Flags = PPCII::MO_TLSLD_LO; 5865 break; 5866 case PPC::ADDItocL: 5867 Flags = PPCII::MO_TOC_LO; 5868 break; 5869 } 5870 5871 SDValue ImmOpnd = Base.getOperand(1); 5872 5873 // On PPC64, the TOC base pointer is guaranteed by the ABI only to have 5874 // 8-byte alignment, and so we can only use offsets less than 8 (otherwise, 5875 // we might have needed different @ha relocation values for the offset 5876 // pointers). 5877 int MaxDisplacement = 7; 5878 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) { 5879 const GlobalValue *GV = GA->getGlobal(); 5880 MaxDisplacement = std::min((int) GV->getAlignment() - 1, MaxDisplacement); 5881 } 5882 5883 bool UpdateHBase = false; 5884 SDValue HBase = Base.getOperand(0); 5885 5886 int Offset = N->getConstantOperandVal(FirstOp); 5887 if (ReplaceFlags) { 5888 if (Offset < 0 || Offset > MaxDisplacement) { 5889 // If we have a addi(toc@l)/addis(toc@ha) pair, and the addis has only 5890 // one use, then we can do this for any offset, we just need to also 5891 // update the offset (i.e. the symbol addend) on the addis also. 5892 if (Base.getMachineOpcode() != PPC::ADDItocL) 5893 continue; 5894 5895 if (!HBase.isMachineOpcode() || 5896 HBase.getMachineOpcode() != PPC::ADDIStocHA) 5897 continue; 5898 5899 if (!Base.hasOneUse() || !HBase.hasOneUse()) 5900 continue; 5901 5902 SDValue HImmOpnd = HBase.getOperand(1); 5903 if (HImmOpnd != ImmOpnd) 5904 continue; 5905 5906 UpdateHBase = true; 5907 } 5908 } else { 5909 // If we're directly folding the addend from an addi instruction, then: 5910 // 1. In general, the offset on the memory access must be zero. 5911 // 2. If the addend is a constant, then it can be combined with a 5912 // non-zero offset, but only if the result meets the encoding 5913 // requirements. 5914 if (auto *C = dyn_cast<ConstantSDNode>(ImmOpnd)) { 5915 Offset += C->getSExtValue(); 5916 5917 if ((StorageOpcode == PPC::LWA || StorageOpcode == PPC::LD || 5918 StorageOpcode == PPC::STD) && (Offset % 4) != 0) 5919 continue; 5920 5921 if (!isInt<16>(Offset)) 5922 continue; 5923 5924 ImmOpnd = CurDAG->getTargetConstant(Offset, SDLoc(ImmOpnd), 5925 ImmOpnd.getValueType()); 5926 } else if (Offset != 0) { 5927 continue; 5928 } 5929 } 5930 5931 // We found an opportunity. Reverse the operands from the add 5932 // immediate and substitute them into the load or store. If 5933 // needed, update the target flags for the immediate operand to 5934 // reflect the necessary relocation information. 5935 DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: "); 5936 DEBUG(Base->dump(CurDAG)); 5937 DEBUG(dbgs() << "\nN: "); 5938 DEBUG(N->dump(CurDAG)); 5939 DEBUG(dbgs() << "\n"); 5940 5941 // If the relocation information isn't already present on the 5942 // immediate operand, add it now. 5943 if (ReplaceFlags) { 5944 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) { 5945 SDLoc dl(GA); 5946 const GlobalValue *GV = GA->getGlobal(); 5947 // We can't perform this optimization for data whose alignment 5948 // is insufficient for the instruction encoding. 5949 if (GV->getAlignment() < 4 && 5950 (StorageOpcode == PPC::LD || StorageOpcode == PPC::STD || 5951 StorageOpcode == PPC::LWA || (Offset % 4) != 0)) { 5952 DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n"); 5953 continue; 5954 } 5955 ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, Offset, Flags); 5956 } else if (ConstantPoolSDNode *CP = 5957 dyn_cast<ConstantPoolSDNode>(ImmOpnd)) { 5958 const Constant *C = CP->getConstVal(); 5959 ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64, 5960 CP->getAlignment(), 5961 Offset, Flags); 5962 } 5963 } 5964 5965 if (FirstOp == 1) // Store 5966 (void)CurDAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd, 5967 Base.getOperand(0), N->getOperand(3)); 5968 else // Load 5969 (void)CurDAG->UpdateNodeOperands(N, ImmOpnd, Base.getOperand(0), 5970 N->getOperand(2)); 5971 5972 if (UpdateHBase) 5973 (void)CurDAG->UpdateNodeOperands(HBase.getNode(), HBase.getOperand(0), 5974 ImmOpnd); 5975 5976 // The add-immediate may now be dead, in which case remove it. 5977 if (Base.getNode()->use_empty()) 5978 CurDAG->RemoveDeadNode(Base.getNode()); 5979 } 5980 } 5981 5982 /// createPPCISelDag - This pass converts a legalized DAG into a 5983 /// PowerPC-specific DAG, ready for instruction scheduling. 5984 /// 5985 FunctionPass *llvm::createPPCISelDag(PPCTargetMachine &TM, 5986 CodeGenOpt::Level OptLevel) { 5987 return new PPCDAGToDAGISel(TM, OptLevel); 5988 } 5989