1 //===-- PPCISelDAGToDAG.cpp - PPC --pattern matching inst selector --------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines a pattern matching instruction selector for PowerPC, 10 // converting from a legalized dag to a PPC dag. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "MCTargetDesc/PPCMCTargetDesc.h" 15 #include "MCTargetDesc/PPCPredicates.h" 16 #include "PPC.h" 17 #include "PPCISelLowering.h" 18 #include "PPCMachineFunctionInfo.h" 19 #include "PPCSubtarget.h" 20 #include "PPCTargetMachine.h" 21 #include "llvm/ADT/APInt.h" 22 #include "llvm/ADT/DenseMap.h" 23 #include "llvm/ADT/STLExtras.h" 24 #include "llvm/ADT/SmallPtrSet.h" 25 #include "llvm/ADT/SmallVector.h" 26 #include "llvm/ADT/Statistic.h" 27 #include "llvm/Analysis/BranchProbabilityInfo.h" 28 #include "llvm/CodeGen/FunctionLoweringInfo.h" 29 #include "llvm/CodeGen/ISDOpcodes.h" 30 #include "llvm/CodeGen/MachineBasicBlock.h" 31 #include "llvm/CodeGen/MachineFunction.h" 32 #include "llvm/CodeGen/MachineInstrBuilder.h" 33 #include "llvm/CodeGen/MachineRegisterInfo.h" 34 #include "llvm/CodeGen/SelectionDAG.h" 35 #include "llvm/CodeGen/SelectionDAGISel.h" 36 #include "llvm/CodeGen/SelectionDAGNodes.h" 37 #include "llvm/CodeGen/TargetInstrInfo.h" 38 #include "llvm/CodeGen/TargetRegisterInfo.h" 39 #include "llvm/CodeGen/ValueTypes.h" 40 #include "llvm/IR/BasicBlock.h" 41 #include "llvm/IR/DebugLoc.h" 42 #include "llvm/IR/Function.h" 43 #include "llvm/IR/GlobalValue.h" 44 #include "llvm/IR/InlineAsm.h" 45 #include "llvm/IR/InstrTypes.h" 46 #include "llvm/IR/Module.h" 47 #include "llvm/Support/Casting.h" 48 #include "llvm/Support/CodeGen.h" 49 #include "llvm/Support/CommandLine.h" 50 #include "llvm/Support/Compiler.h" 51 #include "llvm/Support/Debug.h" 52 #include "llvm/Support/ErrorHandling.h" 53 #include "llvm/Support/KnownBits.h" 54 #include "llvm/Support/MachineValueType.h" 55 #include "llvm/Support/MathExtras.h" 56 #include "llvm/Support/raw_ostream.h" 57 #include <algorithm> 58 #include <cassert> 59 #include <cstdint> 60 #include <iterator> 61 #include <limits> 62 #include <memory> 63 #include <new> 64 #include <tuple> 65 #include <utility> 66 67 using namespace llvm; 68 69 #define DEBUG_TYPE "ppc-codegen" 70 71 STATISTIC(NumSextSetcc, 72 "Number of (sext(setcc)) nodes expanded into GPR sequence."); 73 STATISTIC(NumZextSetcc, 74 "Number of (zext(setcc)) nodes expanded into GPR sequence."); 75 STATISTIC(SignExtensionsAdded, 76 "Number of sign extensions for compare inputs added."); 77 STATISTIC(ZeroExtensionsAdded, 78 "Number of zero extensions for compare inputs added."); 79 STATISTIC(NumLogicOpsOnComparison, 80 "Number of logical ops on i1 values calculated in GPR."); 81 STATISTIC(OmittedForNonExtendUses, 82 "Number of compares not eliminated as they have non-extending uses."); 83 STATISTIC(NumP9Setb, 84 "Number of compares lowered to setb."); 85 86 // FIXME: Remove this once the bug has been fixed! 87 cl::opt<bool> ANDIGlueBug("expose-ppc-andi-glue-bug", 88 cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden); 89 90 static cl::opt<bool> 91 UseBitPermRewriter("ppc-use-bit-perm-rewriter", cl::init(true), 92 cl::desc("use aggressive ppc isel for bit permutations"), 93 cl::Hidden); 94 static cl::opt<bool> BPermRewriterNoMasking( 95 "ppc-bit-perm-rewriter-stress-rotates", 96 cl::desc("stress rotate selection in aggressive ppc isel for " 97 "bit permutations"), 98 cl::Hidden); 99 100 static cl::opt<bool> EnableBranchHint( 101 "ppc-use-branch-hint", cl::init(true), 102 cl::desc("Enable static hinting of branches on ppc"), 103 cl::Hidden); 104 105 static cl::opt<bool> EnableTLSOpt( 106 "ppc-tls-opt", cl::init(true), 107 cl::desc("Enable tls optimization peephole"), 108 cl::Hidden); 109 110 enum ICmpInGPRType { ICGPR_All, ICGPR_None, ICGPR_I32, ICGPR_I64, 111 ICGPR_NonExtIn, ICGPR_Zext, ICGPR_Sext, ICGPR_ZextI32, 112 ICGPR_SextI32, ICGPR_ZextI64, ICGPR_SextI64 }; 113 114 static cl::opt<ICmpInGPRType> CmpInGPR( 115 "ppc-gpr-icmps", cl::Hidden, cl::init(ICGPR_All), 116 cl::desc("Specify the types of comparisons to emit GPR-only code for."), 117 cl::values(clEnumValN(ICGPR_None, "none", "Do not modify integer comparisons."), 118 clEnumValN(ICGPR_All, "all", "All possible int comparisons in GPRs."), 119 clEnumValN(ICGPR_I32, "i32", "Only i32 comparisons in GPRs."), 120 clEnumValN(ICGPR_I64, "i64", "Only i64 comparisons in GPRs."), 121 clEnumValN(ICGPR_NonExtIn, "nonextin", 122 "Only comparisons where inputs don't need [sz]ext."), 123 clEnumValN(ICGPR_Zext, "zext", "Only comparisons with zext result."), 124 clEnumValN(ICGPR_ZextI32, "zexti32", 125 "Only i32 comparisons with zext result."), 126 clEnumValN(ICGPR_ZextI64, "zexti64", 127 "Only i64 comparisons with zext result."), 128 clEnumValN(ICGPR_Sext, "sext", "Only comparisons with sext result."), 129 clEnumValN(ICGPR_SextI32, "sexti32", 130 "Only i32 comparisons with sext result."), 131 clEnumValN(ICGPR_SextI64, "sexti64", 132 "Only i64 comparisons with sext result."))); 133 namespace { 134 135 //===--------------------------------------------------------------------===// 136 /// PPCDAGToDAGISel - PPC specific code to select PPC machine 137 /// instructions for SelectionDAG operations. 138 /// 139 class PPCDAGToDAGISel : public SelectionDAGISel { 140 const PPCTargetMachine &TM; 141 const PPCSubtarget *PPCSubTarget = nullptr; 142 const PPCTargetLowering *PPCLowering = nullptr; 143 unsigned GlobalBaseReg = 0; 144 145 public: 146 explicit PPCDAGToDAGISel(PPCTargetMachine &tm, CodeGenOpt::Level OptLevel) 147 : SelectionDAGISel(tm, OptLevel), TM(tm) {} 148 149 bool runOnMachineFunction(MachineFunction &MF) override { 150 // Make sure we re-emit a set of the global base reg if necessary 151 GlobalBaseReg = 0; 152 PPCSubTarget = &MF.getSubtarget<PPCSubtarget>(); 153 PPCLowering = PPCSubTarget->getTargetLowering(); 154 SelectionDAGISel::runOnMachineFunction(MF); 155 156 if (!PPCSubTarget->isSVR4ABI()) 157 InsertVRSaveCode(MF); 158 159 return true; 160 } 161 162 void PreprocessISelDAG() override; 163 void PostprocessISelDAG() override; 164 165 /// getI16Imm - Return a target constant with the specified value, of type 166 /// i16. 167 inline SDValue getI16Imm(unsigned Imm, const SDLoc &dl) { 168 return CurDAG->getTargetConstant(Imm, dl, MVT::i16); 169 } 170 171 /// getI32Imm - Return a target constant with the specified value, of type 172 /// i32. 173 inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) { 174 return CurDAG->getTargetConstant(Imm, dl, MVT::i32); 175 } 176 177 /// getI64Imm - Return a target constant with the specified value, of type 178 /// i64. 179 inline SDValue getI64Imm(uint64_t Imm, const SDLoc &dl) { 180 return CurDAG->getTargetConstant(Imm, dl, MVT::i64); 181 } 182 183 /// getSmallIPtrImm - Return a target constant of pointer type. 184 inline SDValue getSmallIPtrImm(unsigned Imm, const SDLoc &dl) { 185 return CurDAG->getTargetConstant( 186 Imm, dl, PPCLowering->getPointerTy(CurDAG->getDataLayout())); 187 } 188 189 /// isRotateAndMask - Returns true if Mask and Shift can be folded into a 190 /// rotate and mask opcode and mask operation. 191 static bool isRotateAndMask(SDNode *N, unsigned Mask, bool isShiftMask, 192 unsigned &SH, unsigned &MB, unsigned &ME); 193 194 /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC 195 /// base register. Return the virtual register that holds this value. 196 SDNode *getGlobalBaseReg(); 197 198 void selectFrameIndex(SDNode *SN, SDNode *N, unsigned Offset = 0); 199 200 // Select - Convert the specified operand from a target-independent to a 201 // target-specific node if it hasn't already been changed. 202 void Select(SDNode *N) override; 203 204 bool tryBitfieldInsert(SDNode *N); 205 bool tryBitPermutation(SDNode *N); 206 bool tryIntCompareInGPR(SDNode *N); 207 208 // tryTLSXFormLoad - Convert an ISD::LOAD fed by a PPCISD::ADD_TLS into 209 // an X-Form load instruction with the offset being a relocation coming from 210 // the PPCISD::ADD_TLS. 211 bool tryTLSXFormLoad(LoadSDNode *N); 212 // tryTLSXFormStore - Convert an ISD::STORE fed by a PPCISD::ADD_TLS into 213 // an X-Form store instruction with the offset being a relocation coming from 214 // the PPCISD::ADD_TLS. 215 bool tryTLSXFormStore(StoreSDNode *N); 216 /// SelectCC - Select a comparison of the specified values with the 217 /// specified condition code, returning the CR# of the expression. 218 SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, 219 const SDLoc &dl); 220 221 /// SelectAddrImmOffs - Return true if the operand is valid for a preinc 222 /// immediate field. Note that the operand at this point is already the 223 /// result of a prior SelectAddressRegImm call. 224 bool SelectAddrImmOffs(SDValue N, SDValue &Out) const { 225 if (N.getOpcode() == ISD::TargetConstant || 226 N.getOpcode() == ISD::TargetGlobalAddress) { 227 Out = N; 228 return true; 229 } 230 231 return false; 232 } 233 234 /// SelectAddrIdx - Given the specified address, check to see if it can be 235 /// represented as an indexed [r+r] operation. 236 /// This is for xform instructions whose associated displacement form is D. 237 /// The last parameter \p 0 means associated D form has no requirment for 16 238 /// bit signed displacement. 239 /// Returns false if it can be represented by [r+imm], which are preferred. 240 bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) { 241 return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, 0); 242 } 243 244 /// SelectAddrIdx4 - Given the specified address, check to see if it can be 245 /// represented as an indexed [r+r] operation. 246 /// This is for xform instructions whose associated displacement form is DS. 247 /// The last parameter \p 4 means associated DS form 16 bit signed 248 /// displacement must be a multiple of 4. 249 /// Returns false if it can be represented by [r+imm], which are preferred. 250 bool SelectAddrIdxX4(SDValue N, SDValue &Base, SDValue &Index) { 251 return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, 4); 252 } 253 254 /// SelectAddrIdx16 - Given the specified address, check to see if it can be 255 /// represented as an indexed [r+r] operation. 256 /// This is for xform instructions whose associated displacement form is DQ. 257 /// The last parameter \p 16 means associated DQ form 16 bit signed 258 /// displacement must be a multiple of 16. 259 /// Returns false if it can be represented by [r+imm], which are preferred. 260 bool SelectAddrIdxX16(SDValue N, SDValue &Base, SDValue &Index) { 261 return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, 16); 262 } 263 264 /// SelectAddrIdxOnly - Given the specified address, force it to be 265 /// represented as an indexed [r+r] operation. 266 bool SelectAddrIdxOnly(SDValue N, SDValue &Base, SDValue &Index) { 267 return PPCLowering->SelectAddressRegRegOnly(N, Base, Index, *CurDAG); 268 } 269 270 /// SelectAddrImm - Returns true if the address N can be represented by 271 /// a base register plus a signed 16-bit displacement [r+imm]. 272 /// The last parameter \p 0 means D form has no requirment for 16 bit signed 273 /// displacement. 274 bool SelectAddrImm(SDValue N, SDValue &Disp, 275 SDValue &Base) { 276 return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 0); 277 } 278 279 /// SelectAddrImmX4 - Returns true if the address N can be represented by 280 /// a base register plus a signed 16-bit displacement that is a multiple of 281 /// 4 (last parameter). Suitable for use by STD and friends. 282 bool SelectAddrImmX4(SDValue N, SDValue &Disp, SDValue &Base) { 283 return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 4); 284 } 285 286 /// SelectAddrImmX16 - Returns true if the address N can be represented by 287 /// a base register plus a signed 16-bit displacement that is a multiple of 288 /// 16(last parameter). Suitable for use by STXV and friends. 289 bool SelectAddrImmX16(SDValue N, SDValue &Disp, SDValue &Base) { 290 return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, 16); 291 } 292 293 // Select an address into a single register. 294 bool SelectAddr(SDValue N, SDValue &Base) { 295 Base = N; 296 return true; 297 } 298 299 bool SelectAddrPCRel(SDValue N, SDValue &Base) { 300 return PPCLowering->SelectAddressPCRel(N, Base); 301 } 302 303 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 304 /// inline asm expressions. It is always correct to compute the value into 305 /// a register. The case of adding a (possibly relocatable) constant to a 306 /// register can be improved, but it is wrong to substitute Reg+Reg for 307 /// Reg in an asm, because the load or store opcode would have to change. 308 bool SelectInlineAsmMemoryOperand(const SDValue &Op, 309 unsigned ConstraintID, 310 std::vector<SDValue> &OutOps) override { 311 switch(ConstraintID) { 312 default: 313 errs() << "ConstraintID: " << ConstraintID << "\n"; 314 llvm_unreachable("Unexpected asm memory constraint"); 315 case InlineAsm::Constraint_es: 316 case InlineAsm::Constraint_m: 317 case InlineAsm::Constraint_o: 318 case InlineAsm::Constraint_Q: 319 case InlineAsm::Constraint_Z: 320 case InlineAsm::Constraint_Zy: 321 // We need to make sure that this one operand does not end up in r0 322 // (because we might end up lowering this as 0(%op)). 323 const TargetRegisterInfo *TRI = PPCSubTarget->getRegisterInfo(); 324 const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF, /*Kind=*/1); 325 SDLoc dl(Op); 326 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i32); 327 SDValue NewOp = 328 SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, 329 dl, Op.getValueType(), 330 Op, RC), 0); 331 332 OutOps.push_back(NewOp); 333 return false; 334 } 335 return true; 336 } 337 338 void InsertVRSaveCode(MachineFunction &MF); 339 340 StringRef getPassName() const override { 341 return "PowerPC DAG->DAG Pattern Instruction Selection"; 342 } 343 344 // Include the pieces autogenerated from the target description. 345 #include "PPCGenDAGISel.inc" 346 347 private: 348 bool trySETCC(SDNode *N); 349 bool tryAsSingleRLDICL(SDNode *N); 350 bool tryAsSingleRLDICR(SDNode *N); 351 bool tryAsSingleRLWINM(SDNode *N); 352 bool tryAsSingleRLWINM8(SDNode *N); 353 bool tryAsSingleRLWIMI(SDNode *N); 354 bool tryAsPairOfRLDICL(SDNode *N); 355 bool tryAsSingleRLDIMI(SDNode *N); 356 357 void PeepholePPC64(); 358 void PeepholePPC64ZExt(); 359 void PeepholeCROps(); 360 361 SDValue combineToCMPB(SDNode *N); 362 void foldBoolExts(SDValue &Res, SDNode *&N); 363 364 bool AllUsersSelectZero(SDNode *N); 365 void SwapAllSelectUsers(SDNode *N); 366 367 bool isOffsetMultipleOf(SDNode *N, unsigned Val) const; 368 void transferMemOperands(SDNode *N, SDNode *Result); 369 }; 370 371 } // end anonymous namespace 372 373 /// InsertVRSaveCode - Once the entire function has been instruction selected, 374 /// all virtual registers are created and all machine instructions are built, 375 /// check to see if we need to save/restore VRSAVE. If so, do it. 376 void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) { 377 // Check to see if this function uses vector registers, which means we have to 378 // save and restore the VRSAVE register and update it with the regs we use. 379 // 380 // In this case, there will be virtual registers of vector type created 381 // by the scheduler. Detect them now. 382 bool HasVectorVReg = false; 383 for (unsigned i = 0, e = RegInfo->getNumVirtRegs(); i != e; ++i) { 384 unsigned Reg = Register::index2VirtReg(i); 385 if (RegInfo->getRegClass(Reg) == &PPC::VRRCRegClass) { 386 HasVectorVReg = true; 387 break; 388 } 389 } 390 if (!HasVectorVReg) return; // nothing to do. 391 392 // If we have a vector register, we want to emit code into the entry and exit 393 // blocks to save and restore the VRSAVE register. We do this here (instead 394 // of marking all vector instructions as clobbering VRSAVE) for two reasons: 395 // 396 // 1. This (trivially) reduces the load on the register allocator, by not 397 // having to represent the live range of the VRSAVE register. 398 // 2. This (more significantly) allows us to create a temporary virtual 399 // register to hold the saved VRSAVE value, allowing this temporary to be 400 // register allocated, instead of forcing it to be spilled to the stack. 401 402 // Create two vregs - one to hold the VRSAVE register that is live-in to the 403 // function and one for the value after having bits or'd into it. 404 Register InVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass); 405 Register UpdatedVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass); 406 407 const TargetInstrInfo &TII = *PPCSubTarget->getInstrInfo(); 408 MachineBasicBlock &EntryBB = *Fn.begin(); 409 DebugLoc dl; 410 // Emit the following code into the entry block: 411 // InVRSAVE = MFVRSAVE 412 // UpdatedVRSAVE = UPDATE_VRSAVE InVRSAVE 413 // MTVRSAVE UpdatedVRSAVE 414 MachineBasicBlock::iterator IP = EntryBB.begin(); // Insert Point 415 BuildMI(EntryBB, IP, dl, TII.get(PPC::MFVRSAVE), InVRSAVE); 416 BuildMI(EntryBB, IP, dl, TII.get(PPC::UPDATE_VRSAVE), 417 UpdatedVRSAVE).addReg(InVRSAVE); 418 BuildMI(EntryBB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(UpdatedVRSAVE); 419 420 // Find all return blocks, outputting a restore in each epilog. 421 for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { 422 if (BB->isReturnBlock()) { 423 IP = BB->end(); --IP; 424 425 // Skip over all terminator instructions, which are part of the return 426 // sequence. 427 MachineBasicBlock::iterator I2 = IP; 428 while (I2 != BB->begin() && (--I2)->isTerminator()) 429 IP = I2; 430 431 // Emit: MTVRSAVE InVRSave 432 BuildMI(*BB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(InVRSAVE); 433 } 434 } 435 } 436 437 /// getGlobalBaseReg - Output the instructions required to put the 438 /// base address to use for accessing globals into a register. 439 /// 440 SDNode *PPCDAGToDAGISel::getGlobalBaseReg() { 441 if (!GlobalBaseReg) { 442 const TargetInstrInfo &TII = *PPCSubTarget->getInstrInfo(); 443 // Insert the set of GlobalBaseReg into the first MBB of the function 444 MachineBasicBlock &FirstMBB = MF->front(); 445 MachineBasicBlock::iterator MBBI = FirstMBB.begin(); 446 const Module *M = MF->getFunction().getParent(); 447 DebugLoc dl; 448 449 if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) == MVT::i32) { 450 if (PPCSubTarget->isTargetELF()) { 451 GlobalBaseReg = PPC::R30; 452 if (!PPCSubTarget->isSecurePlt() && 453 M->getPICLevel() == PICLevel::SmallPIC) { 454 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MoveGOTtoLR)); 455 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg); 456 MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true); 457 } else { 458 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR)); 459 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg); 460 Register TempReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass); 461 BuildMI(FirstMBB, MBBI, dl, 462 TII.get(PPC::UpdateGBR), GlobalBaseReg) 463 .addReg(TempReg, RegState::Define).addReg(GlobalBaseReg); 464 MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true); 465 } 466 } else { 467 GlobalBaseReg = 468 RegInfo->createVirtualRegister(&PPC::GPRC_and_GPRC_NOR0RegClass); 469 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR)); 470 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg); 471 } 472 } else { 473 // We must ensure that this sequence is dominated by the prologue. 474 // FIXME: This is a bit of a big hammer since we don't get the benefits 475 // of shrink-wrapping whenever we emit this instruction. Considering 476 // this is used in any function where we emit a jump table, this may be 477 // a significant limitation. We should consider inserting this in the 478 // block where it is used and then commoning this sequence up if it 479 // appears in multiple places. 480 // Note: on ISA 3.0 cores, we can use lnia (addpcis) instead of 481 // MovePCtoLR8. 482 MF->getInfo<PPCFunctionInfo>()->setShrinkWrapDisabled(true); 483 GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass); 484 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8)); 485 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR8), GlobalBaseReg); 486 } 487 } 488 return CurDAG->getRegister(GlobalBaseReg, 489 PPCLowering->getPointerTy(CurDAG->getDataLayout())) 490 .getNode(); 491 } 492 493 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant 494 /// operand. If so Imm will receive the 32-bit value. 495 static bool isInt32Immediate(SDNode *N, unsigned &Imm) { 496 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) { 497 Imm = cast<ConstantSDNode>(N)->getZExtValue(); 498 return true; 499 } 500 return false; 501 } 502 503 /// isInt64Immediate - This method tests to see if the node is a 64-bit constant 504 /// operand. If so Imm will receive the 64-bit value. 505 static bool isInt64Immediate(SDNode *N, uint64_t &Imm) { 506 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i64) { 507 Imm = cast<ConstantSDNode>(N)->getZExtValue(); 508 return true; 509 } 510 return false; 511 } 512 513 // isInt32Immediate - This method tests to see if a constant operand. 514 // If so Imm will receive the 32 bit value. 515 static bool isInt32Immediate(SDValue N, unsigned &Imm) { 516 return isInt32Immediate(N.getNode(), Imm); 517 } 518 519 /// isInt64Immediate - This method tests to see if the value is a 64-bit 520 /// constant operand. If so Imm will receive the 64-bit value. 521 static bool isInt64Immediate(SDValue N, uint64_t &Imm) { 522 return isInt64Immediate(N.getNode(), Imm); 523 } 524 525 static unsigned getBranchHint(unsigned PCC, 526 const FunctionLoweringInfo &FuncInfo, 527 const SDValue &DestMBB) { 528 assert(isa<BasicBlockSDNode>(DestMBB)); 529 530 if (!FuncInfo.BPI) return PPC::BR_NO_HINT; 531 532 const BasicBlock *BB = FuncInfo.MBB->getBasicBlock(); 533 const Instruction *BBTerm = BB->getTerminator(); 534 535 if (BBTerm->getNumSuccessors() != 2) return PPC::BR_NO_HINT; 536 537 const BasicBlock *TBB = BBTerm->getSuccessor(0); 538 const BasicBlock *FBB = BBTerm->getSuccessor(1); 539 540 auto TProb = FuncInfo.BPI->getEdgeProbability(BB, TBB); 541 auto FProb = FuncInfo.BPI->getEdgeProbability(BB, FBB); 542 543 // We only want to handle cases which are easy to predict at static time, e.g. 544 // C++ throw statement, that is very likely not taken, or calling never 545 // returned function, e.g. stdlib exit(). So we set Threshold to filter 546 // unwanted cases. 547 // 548 // Below is LLVM branch weight table, we only want to handle case 1, 2 549 // 550 // Case Taken:Nontaken Example 551 // 1. Unreachable 1048575:1 C++ throw, stdlib exit(), 552 // 2. Invoke-terminating 1:1048575 553 // 3. Coldblock 4:64 __builtin_expect 554 // 4. Loop Branch 124:4 For loop 555 // 5. PH/ZH/FPH 20:12 556 const uint32_t Threshold = 10000; 557 558 if (std::max(TProb, FProb) / Threshold < std::min(TProb, FProb)) 559 return PPC::BR_NO_HINT; 560 561 LLVM_DEBUG(dbgs() << "Use branch hint for '" << FuncInfo.Fn->getName() 562 << "::" << BB->getName() << "'\n" 563 << " -> " << TBB->getName() << ": " << TProb << "\n" 564 << " -> " << FBB->getName() << ": " << FProb << "\n"); 565 566 const BasicBlockSDNode *BBDN = cast<BasicBlockSDNode>(DestMBB); 567 568 // If Dest BasicBlock is False-BasicBlock (FBB), swap branch probabilities, 569 // because we want 'TProb' stands for 'branch probability' to Dest BasicBlock 570 if (BBDN->getBasicBlock()->getBasicBlock() != TBB) 571 std::swap(TProb, FProb); 572 573 return (TProb > FProb) ? PPC::BR_TAKEN_HINT : PPC::BR_NONTAKEN_HINT; 574 } 575 576 // isOpcWithIntImmediate - This method tests to see if the node is a specific 577 // opcode and that it has a immediate integer right operand. 578 // If so Imm will receive the 32 bit value. 579 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) { 580 return N->getOpcode() == Opc 581 && isInt32Immediate(N->getOperand(1).getNode(), Imm); 582 } 583 584 void PPCDAGToDAGISel::selectFrameIndex(SDNode *SN, SDNode *N, unsigned Offset) { 585 SDLoc dl(SN); 586 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 587 SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0)); 588 unsigned Opc = N->getValueType(0) == MVT::i32 ? PPC::ADDI : PPC::ADDI8; 589 if (SN->hasOneUse()) 590 CurDAG->SelectNodeTo(SN, Opc, N->getValueType(0), TFI, 591 getSmallIPtrImm(Offset, dl)); 592 else 593 ReplaceNode(SN, CurDAG->getMachineNode(Opc, dl, N->getValueType(0), TFI, 594 getSmallIPtrImm(Offset, dl))); 595 } 596 597 bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask, 598 bool isShiftMask, unsigned &SH, 599 unsigned &MB, unsigned &ME) { 600 // Don't even go down this path for i64, since different logic will be 601 // necessary for rldicl/rldicr/rldimi. 602 if (N->getValueType(0) != MVT::i32) 603 return false; 604 605 unsigned Shift = 32; 606 unsigned Indeterminant = ~0; // bit mask marking indeterminant results 607 unsigned Opcode = N->getOpcode(); 608 if (N->getNumOperands() != 2 || 609 !isInt32Immediate(N->getOperand(1).getNode(), Shift) || (Shift > 31)) 610 return false; 611 612 if (Opcode == ISD::SHL) { 613 // apply shift left to mask if it comes first 614 if (isShiftMask) Mask = Mask << Shift; 615 // determine which bits are made indeterminant by shift 616 Indeterminant = ~(0xFFFFFFFFu << Shift); 617 } else if (Opcode == ISD::SRL) { 618 // apply shift right to mask if it comes first 619 if (isShiftMask) Mask = Mask >> Shift; 620 // determine which bits are made indeterminant by shift 621 Indeterminant = ~(0xFFFFFFFFu >> Shift); 622 // adjust for the left rotate 623 Shift = 32 - Shift; 624 } else if (Opcode == ISD::ROTL) { 625 Indeterminant = 0; 626 } else { 627 return false; 628 } 629 630 // if the mask doesn't intersect any Indeterminant bits 631 if (Mask && !(Mask & Indeterminant)) { 632 SH = Shift & 31; 633 // make sure the mask is still a mask (wrap arounds may not be) 634 return isRunOfOnes(Mask, MB, ME); 635 } 636 return false; 637 } 638 639 bool PPCDAGToDAGISel::tryTLSXFormStore(StoreSDNode *ST) { 640 SDValue Base = ST->getBasePtr(); 641 if (Base.getOpcode() != PPCISD::ADD_TLS) 642 return false; 643 SDValue Offset = ST->getOffset(); 644 if (!Offset.isUndef()) 645 return false; 646 647 SDLoc dl(ST); 648 EVT MemVT = ST->getMemoryVT(); 649 EVT RegVT = ST->getValue().getValueType(); 650 651 unsigned Opcode; 652 switch (MemVT.getSimpleVT().SimpleTy) { 653 default: 654 return false; 655 case MVT::i8: { 656 Opcode = (RegVT == MVT::i32) ? PPC::STBXTLS_32 : PPC::STBXTLS; 657 break; 658 } 659 case MVT::i16: { 660 Opcode = (RegVT == MVT::i32) ? PPC::STHXTLS_32 : PPC::STHXTLS; 661 break; 662 } 663 case MVT::i32: { 664 Opcode = (RegVT == MVT::i32) ? PPC::STWXTLS_32 : PPC::STWXTLS; 665 break; 666 } 667 case MVT::i64: { 668 Opcode = PPC::STDXTLS; 669 break; 670 } 671 } 672 SDValue Chain = ST->getChain(); 673 SDVTList VTs = ST->getVTList(); 674 SDValue Ops[] = {ST->getValue(), Base.getOperand(0), Base.getOperand(1), 675 Chain}; 676 SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops); 677 transferMemOperands(ST, MN); 678 ReplaceNode(ST, MN); 679 return true; 680 } 681 682 bool PPCDAGToDAGISel::tryTLSXFormLoad(LoadSDNode *LD) { 683 SDValue Base = LD->getBasePtr(); 684 if (Base.getOpcode() != PPCISD::ADD_TLS) 685 return false; 686 SDValue Offset = LD->getOffset(); 687 if (!Offset.isUndef()) 688 return false; 689 690 SDLoc dl(LD); 691 EVT MemVT = LD->getMemoryVT(); 692 EVT RegVT = LD->getValueType(0); 693 unsigned Opcode; 694 switch (MemVT.getSimpleVT().SimpleTy) { 695 default: 696 return false; 697 case MVT::i8: { 698 Opcode = (RegVT == MVT::i32) ? PPC::LBZXTLS_32 : PPC::LBZXTLS; 699 break; 700 } 701 case MVT::i16: { 702 Opcode = (RegVT == MVT::i32) ? PPC::LHZXTLS_32 : PPC::LHZXTLS; 703 break; 704 } 705 case MVT::i32: { 706 Opcode = (RegVT == MVT::i32) ? PPC::LWZXTLS_32 : PPC::LWZXTLS; 707 break; 708 } 709 case MVT::i64: { 710 Opcode = PPC::LDXTLS; 711 break; 712 } 713 } 714 SDValue Chain = LD->getChain(); 715 SDVTList VTs = LD->getVTList(); 716 SDValue Ops[] = {Base.getOperand(0), Base.getOperand(1), Chain}; 717 SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops); 718 transferMemOperands(LD, MN); 719 ReplaceNode(LD, MN); 720 return true; 721 } 722 723 /// Turn an or of two masked values into the rotate left word immediate then 724 /// mask insert (rlwimi) instruction. 725 bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) { 726 SDValue Op0 = N->getOperand(0); 727 SDValue Op1 = N->getOperand(1); 728 SDLoc dl(N); 729 730 KnownBits LKnown = CurDAG->computeKnownBits(Op0); 731 KnownBits RKnown = CurDAG->computeKnownBits(Op1); 732 733 unsigned TargetMask = LKnown.Zero.getZExtValue(); 734 unsigned InsertMask = RKnown.Zero.getZExtValue(); 735 736 if ((TargetMask | InsertMask) == 0xFFFFFFFF) { 737 unsigned Op0Opc = Op0.getOpcode(); 738 unsigned Op1Opc = Op1.getOpcode(); 739 unsigned Value, SH = 0; 740 TargetMask = ~TargetMask; 741 InsertMask = ~InsertMask; 742 743 // If the LHS has a foldable shift and the RHS does not, then swap it to the 744 // RHS so that we can fold the shift into the insert. 745 if (Op0Opc == ISD::AND && Op1Opc == ISD::AND) { 746 if (Op0.getOperand(0).getOpcode() == ISD::SHL || 747 Op0.getOperand(0).getOpcode() == ISD::SRL) { 748 if (Op1.getOperand(0).getOpcode() != ISD::SHL && 749 Op1.getOperand(0).getOpcode() != ISD::SRL) { 750 std::swap(Op0, Op1); 751 std::swap(Op0Opc, Op1Opc); 752 std::swap(TargetMask, InsertMask); 753 } 754 } 755 } else if (Op0Opc == ISD::SHL || Op0Opc == ISD::SRL) { 756 if (Op1Opc == ISD::AND && Op1.getOperand(0).getOpcode() != ISD::SHL && 757 Op1.getOperand(0).getOpcode() != ISD::SRL) { 758 std::swap(Op0, Op1); 759 std::swap(Op0Opc, Op1Opc); 760 std::swap(TargetMask, InsertMask); 761 } 762 } 763 764 unsigned MB, ME; 765 if (isRunOfOnes(InsertMask, MB, ME)) { 766 if ((Op1Opc == ISD::SHL || Op1Opc == ISD::SRL) && 767 isInt32Immediate(Op1.getOperand(1), Value)) { 768 Op1 = Op1.getOperand(0); 769 SH = (Op1Opc == ISD::SHL) ? Value : 32 - Value; 770 } 771 if (Op1Opc == ISD::AND) { 772 // The AND mask might not be a constant, and we need to make sure that 773 // if we're going to fold the masking with the insert, all bits not 774 // know to be zero in the mask are known to be one. 775 KnownBits MKnown = CurDAG->computeKnownBits(Op1.getOperand(1)); 776 bool CanFoldMask = InsertMask == MKnown.One.getZExtValue(); 777 778 unsigned SHOpc = Op1.getOperand(0).getOpcode(); 779 if ((SHOpc == ISD::SHL || SHOpc == ISD::SRL) && CanFoldMask && 780 isInt32Immediate(Op1.getOperand(0).getOperand(1), Value)) { 781 // Note that Value must be in range here (less than 32) because 782 // otherwise there would not be any bits set in InsertMask. 783 Op1 = Op1.getOperand(0).getOperand(0); 784 SH = (SHOpc == ISD::SHL) ? Value : 32 - Value; 785 } 786 } 787 788 SH &= 31; 789 SDValue Ops[] = { Op0, Op1, getI32Imm(SH, dl), getI32Imm(MB, dl), 790 getI32Imm(ME, dl) }; 791 ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops)); 792 return true; 793 } 794 } 795 return false; 796 } 797 798 // Predict the number of instructions that would be generated by calling 799 // selectI64Imm(N). 800 static unsigned selectI64ImmInstrCountDirect(int64_t Imm) { 801 // Assume no remaining bits. 802 unsigned Remainder = 0; 803 // Assume no shift required. 804 unsigned Shift = 0; 805 806 // If it can't be represented as a 32 bit value. 807 if (!isInt<32>(Imm)) { 808 Shift = countTrailingZeros<uint64_t>(Imm); 809 int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift; 810 811 // If the shifted value fits 32 bits. 812 if (isInt<32>(ImmSh)) { 813 // Go with the shifted value. 814 Imm = ImmSh; 815 } else { 816 // Still stuck with a 64 bit value. 817 Remainder = Imm; 818 Shift = 32; 819 Imm >>= 32; 820 } 821 } 822 823 // Intermediate operand. 824 unsigned Result = 0; 825 826 // Handle first 32 bits. 827 unsigned Lo = Imm & 0xFFFF; 828 829 // Simple value. 830 if (isInt<16>(Imm)) { 831 // Just the Lo bits. 832 ++Result; 833 } else if (Lo) { 834 // Handle the Hi bits and Lo bits. 835 Result += 2; 836 } else { 837 // Just the Hi bits. 838 ++Result; 839 } 840 841 // If no shift, we're done. 842 if (!Shift) return Result; 843 844 // If Hi word == Lo word, 845 // we can use rldimi to insert the Lo word into Hi word. 846 if ((unsigned)(Imm & 0xFFFFFFFF) == Remainder) { 847 ++Result; 848 return Result; 849 } 850 851 // Shift for next step if the upper 32-bits were not zero. 852 if (Imm) 853 ++Result; 854 855 // Add in the last bits as required. 856 if ((Remainder >> 16) & 0xFFFF) 857 ++Result; 858 if (Remainder & 0xFFFF) 859 ++Result; 860 861 return Result; 862 } 863 864 static uint64_t Rot64(uint64_t Imm, unsigned R) { 865 return (Imm << R) | (Imm >> (64 - R)); 866 } 867 868 static unsigned selectI64ImmInstrCount(int64_t Imm) { 869 unsigned Count = selectI64ImmInstrCountDirect(Imm); 870 871 // If the instruction count is 1 or 2, we do not need further analysis 872 // since rotate + load constant requires at least 2 instructions. 873 if (Count <= 2) 874 return Count; 875 876 for (unsigned r = 1; r < 63; ++r) { 877 uint64_t RImm = Rot64(Imm, r); 878 unsigned RCount = selectI64ImmInstrCountDirect(RImm) + 1; 879 Count = std::min(Count, RCount); 880 881 // See comments in selectI64Imm for an explanation of the logic below. 882 unsigned LS = findLastSet(RImm); 883 if (LS != r-1) 884 continue; 885 886 uint64_t OnesMask = -(int64_t) (UINT64_C(1) << (LS+1)); 887 uint64_t RImmWithOnes = RImm | OnesMask; 888 889 RCount = selectI64ImmInstrCountDirect(RImmWithOnes) + 1; 890 Count = std::min(Count, RCount); 891 } 892 893 return Count; 894 } 895 896 // Select a 64-bit constant. For cost-modeling purposes, selectI64ImmInstrCount 897 // (above) needs to be kept in sync with this function. 898 static SDNode *selectI64ImmDirect(SelectionDAG *CurDAG, const SDLoc &dl, 899 int64_t Imm) { 900 // Assume no remaining bits. 901 unsigned Remainder = 0; 902 // Assume no shift required. 903 unsigned Shift = 0; 904 905 // If it can't be represented as a 32 bit value. 906 if (!isInt<32>(Imm)) { 907 Shift = countTrailingZeros<uint64_t>(Imm); 908 int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift; 909 910 // If the shifted value fits 32 bits. 911 if (isInt<32>(ImmSh)) { 912 // Go with the shifted value. 913 Imm = ImmSh; 914 } else { 915 // Still stuck with a 64 bit value. 916 Remainder = Imm; 917 Shift = 32; 918 Imm >>= 32; 919 } 920 } 921 922 // Intermediate operand. 923 SDNode *Result; 924 925 // Handle first 32 bits. 926 unsigned Lo = Imm & 0xFFFF; 927 unsigned Hi = (Imm >> 16) & 0xFFFF; 928 929 auto getI32Imm = [CurDAG, dl](unsigned Imm) { 930 return CurDAG->getTargetConstant(Imm, dl, MVT::i32); 931 }; 932 933 // Simple value. 934 if (isInt<16>(Imm)) { 935 uint64_t SextImm = SignExtend64(Lo, 16); 936 SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64); 937 // Just the Lo bits. 938 Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm); 939 } else if (Lo) { 940 // Handle the Hi bits. 941 unsigned OpC = Hi ? PPC::LIS8 : PPC::LI8; 942 Result = CurDAG->getMachineNode(OpC, dl, MVT::i64, getI32Imm(Hi)); 943 // And Lo bits. 944 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, 945 SDValue(Result, 0), getI32Imm(Lo)); 946 } else { 947 // Just the Hi bits. 948 Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(Hi)); 949 } 950 951 // If no shift, we're done. 952 if (!Shift) return Result; 953 954 // If Hi word == Lo word, 955 // we can use rldimi to insert the Lo word into Hi word. 956 if ((unsigned)(Imm & 0xFFFFFFFF) == Remainder) { 957 SDValue Ops[] = 958 { SDValue(Result, 0), SDValue(Result, 0), getI32Imm(Shift), getI32Imm(0)}; 959 return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops); 960 } 961 962 // Shift for next step if the upper 32-bits were not zero. 963 if (Imm) { 964 Result = CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, 965 SDValue(Result, 0), 966 getI32Imm(Shift), 967 getI32Imm(63 - Shift)); 968 } 969 970 // Add in the last bits as required. 971 if ((Hi = (Remainder >> 16) & 0xFFFF)) { 972 Result = CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64, 973 SDValue(Result, 0), getI32Imm(Hi)); 974 } 975 if ((Lo = Remainder & 0xFFFF)) { 976 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, 977 SDValue(Result, 0), getI32Imm(Lo)); 978 } 979 980 return Result; 981 } 982 983 static SDNode *selectI64Imm(SelectionDAG *CurDAG, const SDLoc &dl, 984 int64_t Imm) { 985 unsigned Count = selectI64ImmInstrCountDirect(Imm); 986 987 // If the instruction count is 1 or 2, we do not need further analysis 988 // since rotate + load constant requires at least 2 instructions. 989 if (Count <= 2) 990 return selectI64ImmDirect(CurDAG, dl, Imm); 991 992 unsigned RMin = 0; 993 994 int64_t MatImm; 995 unsigned MaskEnd; 996 997 for (unsigned r = 1; r < 63; ++r) { 998 uint64_t RImm = Rot64(Imm, r); 999 unsigned RCount = selectI64ImmInstrCountDirect(RImm) + 1; 1000 if (RCount < Count) { 1001 Count = RCount; 1002 RMin = r; 1003 MatImm = RImm; 1004 MaskEnd = 63; 1005 } 1006 1007 // If the immediate to generate has many trailing zeros, it might be 1008 // worthwhile to generate a rotated value with too many leading ones 1009 // (because that's free with li/lis's sign-extension semantics), and then 1010 // mask them off after rotation. 1011 1012 unsigned LS = findLastSet(RImm); 1013 // We're adding (63-LS) higher-order ones, and we expect to mask them off 1014 // after performing the inverse rotation by (64-r). So we need that: 1015 // 63-LS == 64-r => LS == r-1 1016 if (LS != r-1) 1017 continue; 1018 1019 uint64_t OnesMask = -(int64_t) (UINT64_C(1) << (LS+1)); 1020 uint64_t RImmWithOnes = RImm | OnesMask; 1021 1022 RCount = selectI64ImmInstrCountDirect(RImmWithOnes) + 1; 1023 if (RCount < Count) { 1024 Count = RCount; 1025 RMin = r; 1026 MatImm = RImmWithOnes; 1027 MaskEnd = LS; 1028 } 1029 } 1030 1031 if (!RMin) 1032 return selectI64ImmDirect(CurDAG, dl, Imm); 1033 1034 auto getI32Imm = [CurDAG, dl](unsigned Imm) { 1035 return CurDAG->getTargetConstant(Imm, dl, MVT::i32); 1036 }; 1037 1038 SDValue Val = SDValue(selectI64ImmDirect(CurDAG, dl, MatImm), 0); 1039 return CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Val, 1040 getI32Imm(64 - RMin), getI32Imm(MaskEnd)); 1041 } 1042 1043 static unsigned allUsesTruncate(SelectionDAG *CurDAG, SDNode *N) { 1044 unsigned MaxTruncation = 0; 1045 // Cannot use range-based for loop here as we need the actual use (i.e. we 1046 // need the operand number corresponding to the use). A range-based for 1047 // will unbox the use and provide an SDNode*. 1048 for (SDNode::use_iterator Use = N->use_begin(), UseEnd = N->use_end(); 1049 Use != UseEnd; ++Use) { 1050 unsigned Opc = 1051 Use->isMachineOpcode() ? Use->getMachineOpcode() : Use->getOpcode(); 1052 switch (Opc) { 1053 default: return 0; 1054 case ISD::TRUNCATE: 1055 if (Use->isMachineOpcode()) 1056 return 0; 1057 MaxTruncation = 1058 std::max(MaxTruncation, (unsigned)Use->getValueType(0).getSizeInBits()); 1059 continue; 1060 case ISD::STORE: { 1061 if (Use->isMachineOpcode()) 1062 return 0; 1063 StoreSDNode *STN = cast<StoreSDNode>(*Use); 1064 unsigned MemVTSize = STN->getMemoryVT().getSizeInBits(); 1065 if (MemVTSize == 64 || Use.getOperandNo() != 0) 1066 return 0; 1067 MaxTruncation = std::max(MaxTruncation, MemVTSize); 1068 continue; 1069 } 1070 case PPC::STW8: 1071 case PPC::STWX8: 1072 case PPC::STWU8: 1073 case PPC::STWUX8: 1074 if (Use.getOperandNo() != 0) 1075 return 0; 1076 MaxTruncation = std::max(MaxTruncation, 32u); 1077 continue; 1078 case PPC::STH8: 1079 case PPC::STHX8: 1080 case PPC::STHU8: 1081 case PPC::STHUX8: 1082 if (Use.getOperandNo() != 0) 1083 return 0; 1084 MaxTruncation = std::max(MaxTruncation, 16u); 1085 continue; 1086 case PPC::STB8: 1087 case PPC::STBX8: 1088 case PPC::STBU8: 1089 case PPC::STBUX8: 1090 if (Use.getOperandNo() != 0) 1091 return 0; 1092 MaxTruncation = std::max(MaxTruncation, 8u); 1093 continue; 1094 } 1095 } 1096 return MaxTruncation; 1097 } 1098 1099 // Select a 64-bit constant. 1100 static SDNode *selectI64Imm(SelectionDAG *CurDAG, SDNode *N) { 1101 SDLoc dl(N); 1102 1103 // Get 64 bit value. 1104 int64_t Imm = cast<ConstantSDNode>(N)->getZExtValue(); 1105 if (unsigned MinSize = allUsesTruncate(CurDAG, N)) { 1106 uint64_t SextImm = SignExtend64(Imm, MinSize); 1107 SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64); 1108 if (isInt<16>(SextImm)) 1109 return CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm); 1110 } 1111 return selectI64Imm(CurDAG, dl, Imm); 1112 } 1113 1114 namespace { 1115 1116 class BitPermutationSelector { 1117 struct ValueBit { 1118 SDValue V; 1119 1120 // The bit number in the value, using a convention where bit 0 is the 1121 // lowest-order bit. 1122 unsigned Idx; 1123 1124 // ConstZero means a bit we need to mask off. 1125 // Variable is a bit comes from an input variable. 1126 // VariableKnownToBeZero is also a bit comes from an input variable, 1127 // but it is known to be already zero. So we do not need to mask them. 1128 enum Kind { 1129 ConstZero, 1130 Variable, 1131 VariableKnownToBeZero 1132 } K; 1133 1134 ValueBit(SDValue V, unsigned I, Kind K = Variable) 1135 : V(V), Idx(I), K(K) {} 1136 ValueBit(Kind K = Variable) 1137 : V(SDValue(nullptr, 0)), Idx(UINT32_MAX), K(K) {} 1138 1139 bool isZero() const { 1140 return K == ConstZero || K == VariableKnownToBeZero; 1141 } 1142 1143 bool hasValue() const { 1144 return K == Variable || K == VariableKnownToBeZero; 1145 } 1146 1147 SDValue getValue() const { 1148 assert(hasValue() && "Cannot get the value of a constant bit"); 1149 return V; 1150 } 1151 1152 unsigned getValueBitIndex() const { 1153 assert(hasValue() && "Cannot get the value bit index of a constant bit"); 1154 return Idx; 1155 } 1156 }; 1157 1158 // A bit group has the same underlying value and the same rotate factor. 1159 struct BitGroup { 1160 SDValue V; 1161 unsigned RLAmt; 1162 unsigned StartIdx, EndIdx; 1163 1164 // This rotation amount assumes that the lower 32 bits of the quantity are 1165 // replicated in the high 32 bits by the rotation operator (which is done 1166 // by rlwinm and friends in 64-bit mode). 1167 bool Repl32; 1168 // Did converting to Repl32 == true change the rotation factor? If it did, 1169 // it decreased it by 32. 1170 bool Repl32CR; 1171 // Was this group coalesced after setting Repl32 to true? 1172 bool Repl32Coalesced; 1173 1174 BitGroup(SDValue V, unsigned R, unsigned S, unsigned E) 1175 : V(V), RLAmt(R), StartIdx(S), EndIdx(E), Repl32(false), Repl32CR(false), 1176 Repl32Coalesced(false) { 1177 LLVM_DEBUG(dbgs() << "\tbit group for " << V.getNode() << " RLAmt = " << R 1178 << " [" << S << ", " << E << "]\n"); 1179 } 1180 }; 1181 1182 // Information on each (Value, RLAmt) pair (like the number of groups 1183 // associated with each) used to choose the lowering method. 1184 struct ValueRotInfo { 1185 SDValue V; 1186 unsigned RLAmt = std::numeric_limits<unsigned>::max(); 1187 unsigned NumGroups = 0; 1188 unsigned FirstGroupStartIdx = std::numeric_limits<unsigned>::max(); 1189 bool Repl32 = false; 1190 1191 ValueRotInfo() = default; 1192 1193 // For sorting (in reverse order) by NumGroups, and then by 1194 // FirstGroupStartIdx. 1195 bool operator < (const ValueRotInfo &Other) const { 1196 // We need to sort so that the non-Repl32 come first because, when we're 1197 // doing masking, the Repl32 bit groups might be subsumed into the 64-bit 1198 // masking operation. 1199 if (Repl32 < Other.Repl32) 1200 return true; 1201 else if (Repl32 > Other.Repl32) 1202 return false; 1203 else if (NumGroups > Other.NumGroups) 1204 return true; 1205 else if (NumGroups < Other.NumGroups) 1206 return false; 1207 else if (RLAmt == 0 && Other.RLAmt != 0) 1208 return true; 1209 else if (RLAmt != 0 && Other.RLAmt == 0) 1210 return false; 1211 else if (FirstGroupStartIdx < Other.FirstGroupStartIdx) 1212 return true; 1213 return false; 1214 } 1215 }; 1216 1217 using ValueBitsMemoizedValue = std::pair<bool, SmallVector<ValueBit, 64>>; 1218 using ValueBitsMemoizer = 1219 DenseMap<SDValue, std::unique_ptr<ValueBitsMemoizedValue>>; 1220 ValueBitsMemoizer Memoizer; 1221 1222 // Return a pair of bool and a SmallVector pointer to a memoization entry. 1223 // The bool is true if something interesting was deduced, otherwise if we're 1224 // providing only a generic representation of V (or something else likewise 1225 // uninteresting for instruction selection) through the SmallVector. 1226 std::pair<bool, SmallVector<ValueBit, 64> *> getValueBits(SDValue V, 1227 unsigned NumBits) { 1228 auto &ValueEntry = Memoizer[V]; 1229 if (ValueEntry) 1230 return std::make_pair(ValueEntry->first, &ValueEntry->second); 1231 ValueEntry.reset(new ValueBitsMemoizedValue()); 1232 bool &Interesting = ValueEntry->first; 1233 SmallVector<ValueBit, 64> &Bits = ValueEntry->second; 1234 Bits.resize(NumBits); 1235 1236 switch (V.getOpcode()) { 1237 default: break; 1238 case ISD::ROTL: 1239 if (isa<ConstantSDNode>(V.getOperand(1))) { 1240 unsigned RotAmt = V.getConstantOperandVal(1); 1241 1242 const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second; 1243 1244 for (unsigned i = 0; i < NumBits; ++i) 1245 Bits[i] = LHSBits[i < RotAmt ? i + (NumBits - RotAmt) : i - RotAmt]; 1246 1247 return std::make_pair(Interesting = true, &Bits); 1248 } 1249 break; 1250 case ISD::SHL: 1251 if (isa<ConstantSDNode>(V.getOperand(1))) { 1252 unsigned ShiftAmt = V.getConstantOperandVal(1); 1253 1254 const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second; 1255 1256 for (unsigned i = ShiftAmt; i < NumBits; ++i) 1257 Bits[i] = LHSBits[i - ShiftAmt]; 1258 1259 for (unsigned i = 0; i < ShiftAmt; ++i) 1260 Bits[i] = ValueBit(ValueBit::ConstZero); 1261 1262 return std::make_pair(Interesting = true, &Bits); 1263 } 1264 break; 1265 case ISD::SRL: 1266 if (isa<ConstantSDNode>(V.getOperand(1))) { 1267 unsigned ShiftAmt = V.getConstantOperandVal(1); 1268 1269 const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second; 1270 1271 for (unsigned i = 0; i < NumBits - ShiftAmt; ++i) 1272 Bits[i] = LHSBits[i + ShiftAmt]; 1273 1274 for (unsigned i = NumBits - ShiftAmt; i < NumBits; ++i) 1275 Bits[i] = ValueBit(ValueBit::ConstZero); 1276 1277 return std::make_pair(Interesting = true, &Bits); 1278 } 1279 break; 1280 case ISD::AND: 1281 if (isa<ConstantSDNode>(V.getOperand(1))) { 1282 uint64_t Mask = V.getConstantOperandVal(1); 1283 1284 const SmallVector<ValueBit, 64> *LHSBits; 1285 // Mark this as interesting, only if the LHS was also interesting. This 1286 // prevents the overall procedure from matching a single immediate 'and' 1287 // (which is non-optimal because such an and might be folded with other 1288 // things if we don't select it here). 1289 std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0), NumBits); 1290 1291 for (unsigned i = 0; i < NumBits; ++i) 1292 if (((Mask >> i) & 1) == 1) 1293 Bits[i] = (*LHSBits)[i]; 1294 else { 1295 // AND instruction masks this bit. If the input is already zero, 1296 // we have nothing to do here. Otherwise, make the bit ConstZero. 1297 if ((*LHSBits)[i].isZero()) 1298 Bits[i] = (*LHSBits)[i]; 1299 else 1300 Bits[i] = ValueBit(ValueBit::ConstZero); 1301 } 1302 1303 return std::make_pair(Interesting, &Bits); 1304 } 1305 break; 1306 case ISD::OR: { 1307 const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second; 1308 const auto &RHSBits = *getValueBits(V.getOperand(1), NumBits).second; 1309 1310 bool AllDisjoint = true; 1311 SDValue LastVal = SDValue(); 1312 unsigned LastIdx = 0; 1313 for (unsigned i = 0; i < NumBits; ++i) { 1314 if (LHSBits[i].isZero() && RHSBits[i].isZero()) { 1315 // If both inputs are known to be zero and one is ConstZero and 1316 // another is VariableKnownToBeZero, we can select whichever 1317 // we like. To minimize the number of bit groups, we select 1318 // VariableKnownToBeZero if this bit is the next bit of the same 1319 // input variable from the previous bit. Otherwise, we select 1320 // ConstZero. 1321 if (LHSBits[i].hasValue() && LHSBits[i].getValue() == LastVal && 1322 LHSBits[i].getValueBitIndex() == LastIdx + 1) 1323 Bits[i] = LHSBits[i]; 1324 else if (RHSBits[i].hasValue() && RHSBits[i].getValue() == LastVal && 1325 RHSBits[i].getValueBitIndex() == LastIdx + 1) 1326 Bits[i] = RHSBits[i]; 1327 else 1328 Bits[i] = ValueBit(ValueBit::ConstZero); 1329 } 1330 else if (LHSBits[i].isZero()) 1331 Bits[i] = RHSBits[i]; 1332 else if (RHSBits[i].isZero()) 1333 Bits[i] = LHSBits[i]; 1334 else { 1335 AllDisjoint = false; 1336 break; 1337 } 1338 // We remember the value and bit index of this bit. 1339 if (Bits[i].hasValue()) { 1340 LastVal = Bits[i].getValue(); 1341 LastIdx = Bits[i].getValueBitIndex(); 1342 } 1343 else { 1344 if (LastVal) LastVal = SDValue(); 1345 LastIdx = 0; 1346 } 1347 } 1348 1349 if (!AllDisjoint) 1350 break; 1351 1352 return std::make_pair(Interesting = true, &Bits); 1353 } 1354 case ISD::ZERO_EXTEND: { 1355 // We support only the case with zero extension from i32 to i64 so far. 1356 if (V.getValueType() != MVT::i64 || 1357 V.getOperand(0).getValueType() != MVT::i32) 1358 break; 1359 1360 const SmallVector<ValueBit, 64> *LHSBits; 1361 const unsigned NumOperandBits = 32; 1362 std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0), 1363 NumOperandBits); 1364 1365 for (unsigned i = 0; i < NumOperandBits; ++i) 1366 Bits[i] = (*LHSBits)[i]; 1367 1368 for (unsigned i = NumOperandBits; i < NumBits; ++i) 1369 Bits[i] = ValueBit(ValueBit::ConstZero); 1370 1371 return std::make_pair(Interesting, &Bits); 1372 } 1373 case ISD::TRUNCATE: { 1374 EVT FromType = V.getOperand(0).getValueType(); 1375 EVT ToType = V.getValueType(); 1376 // We support only the case with truncate from i64 to i32. 1377 if (FromType != MVT::i64 || ToType != MVT::i32) 1378 break; 1379 const unsigned NumAllBits = FromType.getSizeInBits(); 1380 SmallVector<ValueBit, 64> *InBits; 1381 std::tie(Interesting, InBits) = getValueBits(V.getOperand(0), 1382 NumAllBits); 1383 const unsigned NumValidBits = ToType.getSizeInBits(); 1384 1385 // A 32-bit instruction cannot touch upper 32-bit part of 64-bit value. 1386 // So, we cannot include this truncate. 1387 bool UseUpper32bit = false; 1388 for (unsigned i = 0; i < NumValidBits; ++i) 1389 if ((*InBits)[i].hasValue() && (*InBits)[i].getValueBitIndex() >= 32) { 1390 UseUpper32bit = true; 1391 break; 1392 } 1393 if (UseUpper32bit) 1394 break; 1395 1396 for (unsigned i = 0; i < NumValidBits; ++i) 1397 Bits[i] = (*InBits)[i]; 1398 1399 return std::make_pair(Interesting, &Bits); 1400 } 1401 case ISD::AssertZext: { 1402 // For AssertZext, we look through the operand and 1403 // mark the bits known to be zero. 1404 const SmallVector<ValueBit, 64> *LHSBits; 1405 std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0), 1406 NumBits); 1407 1408 EVT FromType = cast<VTSDNode>(V.getOperand(1))->getVT(); 1409 const unsigned NumValidBits = FromType.getSizeInBits(); 1410 for (unsigned i = 0; i < NumValidBits; ++i) 1411 Bits[i] = (*LHSBits)[i]; 1412 1413 // These bits are known to be zero but the AssertZext may be from a value 1414 // that already has some constant zero bits (i.e. from a masking and). 1415 for (unsigned i = NumValidBits; i < NumBits; ++i) 1416 Bits[i] = (*LHSBits)[i].hasValue() 1417 ? ValueBit((*LHSBits)[i].getValue(), 1418 (*LHSBits)[i].getValueBitIndex(), 1419 ValueBit::VariableKnownToBeZero) 1420 : ValueBit(ValueBit::ConstZero); 1421 1422 return std::make_pair(Interesting, &Bits); 1423 } 1424 case ISD::LOAD: 1425 LoadSDNode *LD = cast<LoadSDNode>(V); 1426 if (ISD::isZEXTLoad(V.getNode()) && V.getResNo() == 0) { 1427 EVT VT = LD->getMemoryVT(); 1428 const unsigned NumValidBits = VT.getSizeInBits(); 1429 1430 for (unsigned i = 0; i < NumValidBits; ++i) 1431 Bits[i] = ValueBit(V, i); 1432 1433 // These bits are known to be zero. 1434 for (unsigned i = NumValidBits; i < NumBits; ++i) 1435 Bits[i] = ValueBit(V, i, ValueBit::VariableKnownToBeZero); 1436 1437 // Zero-extending load itself cannot be optimized. So, it is not 1438 // interesting by itself though it gives useful information. 1439 return std::make_pair(Interesting = false, &Bits); 1440 } 1441 break; 1442 } 1443 1444 for (unsigned i = 0; i < NumBits; ++i) 1445 Bits[i] = ValueBit(V, i); 1446 1447 return std::make_pair(Interesting = false, &Bits); 1448 } 1449 1450 // For each value (except the constant ones), compute the left-rotate amount 1451 // to get it from its original to final position. 1452 void computeRotationAmounts() { 1453 NeedMask = false; 1454 RLAmt.resize(Bits.size()); 1455 for (unsigned i = 0; i < Bits.size(); ++i) 1456 if (Bits[i].hasValue()) { 1457 unsigned VBI = Bits[i].getValueBitIndex(); 1458 if (i >= VBI) 1459 RLAmt[i] = i - VBI; 1460 else 1461 RLAmt[i] = Bits.size() - (VBI - i); 1462 } else if (Bits[i].isZero()) { 1463 NeedMask = true; 1464 RLAmt[i] = UINT32_MAX; 1465 } else { 1466 llvm_unreachable("Unknown value bit type"); 1467 } 1468 } 1469 1470 // Collect groups of consecutive bits with the same underlying value and 1471 // rotation factor. If we're doing late masking, we ignore zeros, otherwise 1472 // they break up groups. 1473 void collectBitGroups(bool LateMask) { 1474 BitGroups.clear(); 1475 1476 unsigned LastRLAmt = RLAmt[0]; 1477 SDValue LastValue = Bits[0].hasValue() ? Bits[0].getValue() : SDValue(); 1478 unsigned LastGroupStartIdx = 0; 1479 bool IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue(); 1480 for (unsigned i = 1; i < Bits.size(); ++i) { 1481 unsigned ThisRLAmt = RLAmt[i]; 1482 SDValue ThisValue = Bits[i].hasValue() ? Bits[i].getValue() : SDValue(); 1483 if (LateMask && !ThisValue) { 1484 ThisValue = LastValue; 1485 ThisRLAmt = LastRLAmt; 1486 // If we're doing late masking, then the first bit group always starts 1487 // at zero (even if the first bits were zero). 1488 if (BitGroups.empty()) 1489 LastGroupStartIdx = 0; 1490 } 1491 1492 // If this bit is known to be zero and the current group is a bit group 1493 // of zeros, we do not need to terminate the current bit group even the 1494 // Value or RLAmt does not match here. Instead, we terminate this group 1495 // when the first non-zero bit appears later. 1496 if (IsGroupOfZeros && Bits[i].isZero()) 1497 continue; 1498 1499 // If this bit has the same underlying value and the same rotate factor as 1500 // the last one, then they're part of the same group. 1501 if (ThisRLAmt == LastRLAmt && ThisValue == LastValue) 1502 // We cannot continue the current group if this bits is not known to 1503 // be zero in a bit group of zeros. 1504 if (!(IsGroupOfZeros && ThisValue && !Bits[i].isZero())) 1505 continue; 1506 1507 if (LastValue.getNode()) 1508 BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx, 1509 i-1)); 1510 LastRLAmt = ThisRLAmt; 1511 LastValue = ThisValue; 1512 LastGroupStartIdx = i; 1513 IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue(); 1514 } 1515 if (LastValue.getNode()) 1516 BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx, 1517 Bits.size()-1)); 1518 1519 if (BitGroups.empty()) 1520 return; 1521 1522 // We might be able to combine the first and last groups. 1523 if (BitGroups.size() > 1) { 1524 // If the first and last groups are the same, then remove the first group 1525 // in favor of the last group, making the ending index of the last group 1526 // equal to the ending index of the to-be-removed first group. 1527 if (BitGroups[0].StartIdx == 0 && 1528 BitGroups[BitGroups.size()-1].EndIdx == Bits.size()-1 && 1529 BitGroups[0].V == BitGroups[BitGroups.size()-1].V && 1530 BitGroups[0].RLAmt == BitGroups[BitGroups.size()-1].RLAmt) { 1531 LLVM_DEBUG(dbgs() << "\tcombining final bit group with initial one\n"); 1532 BitGroups[BitGroups.size()-1].EndIdx = BitGroups[0].EndIdx; 1533 BitGroups.erase(BitGroups.begin()); 1534 } 1535 } 1536 } 1537 1538 // Take all (SDValue, RLAmt) pairs and sort them by the number of groups 1539 // associated with each. If the number of groups are same, we prefer a group 1540 // which does not require rotate, i.e. RLAmt is 0, to avoid the first rotate 1541 // instruction. If there is a degeneracy, pick the one that occurs 1542 // first (in the final value). 1543 void collectValueRotInfo() { 1544 ValueRots.clear(); 1545 1546 for (auto &BG : BitGroups) { 1547 unsigned RLAmtKey = BG.RLAmt + (BG.Repl32 ? 64 : 0); 1548 ValueRotInfo &VRI = ValueRots[std::make_pair(BG.V, RLAmtKey)]; 1549 VRI.V = BG.V; 1550 VRI.RLAmt = BG.RLAmt; 1551 VRI.Repl32 = BG.Repl32; 1552 VRI.NumGroups += 1; 1553 VRI.FirstGroupStartIdx = std::min(VRI.FirstGroupStartIdx, BG.StartIdx); 1554 } 1555 1556 // Now that we've collected the various ValueRotInfo instances, we need to 1557 // sort them. 1558 ValueRotsVec.clear(); 1559 for (auto &I : ValueRots) { 1560 ValueRotsVec.push_back(I.second); 1561 } 1562 llvm::sort(ValueRotsVec); 1563 } 1564 1565 // In 64-bit mode, rlwinm and friends have a rotation operator that 1566 // replicates the low-order 32 bits into the high-order 32-bits. The mask 1567 // indices of these instructions can only be in the lower 32 bits, so they 1568 // can only represent some 64-bit bit groups. However, when they can be used, 1569 // the 32-bit replication can be used to represent, as a single bit group, 1570 // otherwise separate bit groups. We'll convert to replicated-32-bit bit 1571 // groups when possible. Returns true if any of the bit groups were 1572 // converted. 1573 void assignRepl32BitGroups() { 1574 // If we have bits like this: 1575 // 1576 // Indices: 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 1577 // V bits: ... 7 6 5 4 3 2 1 0 31 30 29 28 27 26 25 24 1578 // Groups: | RLAmt = 8 | RLAmt = 40 | 1579 // 1580 // But, making use of a 32-bit operation that replicates the low-order 32 1581 // bits into the high-order 32 bits, this can be one bit group with a RLAmt 1582 // of 8. 1583 1584 auto IsAllLow32 = [this](BitGroup & BG) { 1585 if (BG.StartIdx <= BG.EndIdx) { 1586 for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i) { 1587 if (!Bits[i].hasValue()) 1588 continue; 1589 if (Bits[i].getValueBitIndex() >= 32) 1590 return false; 1591 } 1592 } else { 1593 for (unsigned i = BG.StartIdx; i < Bits.size(); ++i) { 1594 if (!Bits[i].hasValue()) 1595 continue; 1596 if (Bits[i].getValueBitIndex() >= 32) 1597 return false; 1598 } 1599 for (unsigned i = 0; i <= BG.EndIdx; ++i) { 1600 if (!Bits[i].hasValue()) 1601 continue; 1602 if (Bits[i].getValueBitIndex() >= 32) 1603 return false; 1604 } 1605 } 1606 1607 return true; 1608 }; 1609 1610 for (auto &BG : BitGroups) { 1611 // If this bit group has RLAmt of 0 and will not be merged with 1612 // another bit group, we don't benefit from Repl32. We don't mark 1613 // such group to give more freedom for later instruction selection. 1614 if (BG.RLAmt == 0) { 1615 auto PotentiallyMerged = [this](BitGroup & BG) { 1616 for (auto &BG2 : BitGroups) 1617 if (&BG != &BG2 && BG.V == BG2.V && 1618 (BG2.RLAmt == 0 || BG2.RLAmt == 32)) 1619 return true; 1620 return false; 1621 }; 1622 if (!PotentiallyMerged(BG)) 1623 continue; 1624 } 1625 if (BG.StartIdx < 32 && BG.EndIdx < 32) { 1626 if (IsAllLow32(BG)) { 1627 if (BG.RLAmt >= 32) { 1628 BG.RLAmt -= 32; 1629 BG.Repl32CR = true; 1630 } 1631 1632 BG.Repl32 = true; 1633 1634 LLVM_DEBUG(dbgs() << "\t32-bit replicated bit group for " 1635 << BG.V.getNode() << " RLAmt = " << BG.RLAmt << " [" 1636 << BG.StartIdx << ", " << BG.EndIdx << "]\n"); 1637 } 1638 } 1639 } 1640 1641 // Now walk through the bit groups, consolidating where possible. 1642 for (auto I = BitGroups.begin(); I != BitGroups.end();) { 1643 // We might want to remove this bit group by merging it with the previous 1644 // group (which might be the ending group). 1645 auto IP = (I == BitGroups.begin()) ? 1646 std::prev(BitGroups.end()) : std::prev(I); 1647 if (I->Repl32 && IP->Repl32 && I->V == IP->V && I->RLAmt == IP->RLAmt && 1648 I->StartIdx == (IP->EndIdx + 1) % 64 && I != IP) { 1649 1650 LLVM_DEBUG(dbgs() << "\tcombining 32-bit replicated bit group for " 1651 << I->V.getNode() << " RLAmt = " << I->RLAmt << " [" 1652 << I->StartIdx << ", " << I->EndIdx 1653 << "] with group with range [" << IP->StartIdx << ", " 1654 << IP->EndIdx << "]\n"); 1655 1656 IP->EndIdx = I->EndIdx; 1657 IP->Repl32CR = IP->Repl32CR || I->Repl32CR; 1658 IP->Repl32Coalesced = true; 1659 I = BitGroups.erase(I); 1660 continue; 1661 } else { 1662 // There is a special case worth handling: If there is a single group 1663 // covering the entire upper 32 bits, and it can be merged with both 1664 // the next and previous groups (which might be the same group), then 1665 // do so. If it is the same group (so there will be only one group in 1666 // total), then we need to reverse the order of the range so that it 1667 // covers the entire 64 bits. 1668 if (I->StartIdx == 32 && I->EndIdx == 63) { 1669 assert(std::next(I) == BitGroups.end() && 1670 "bit group ends at index 63 but there is another?"); 1671 auto IN = BitGroups.begin(); 1672 1673 if (IP->Repl32 && IN->Repl32 && I->V == IP->V && I->V == IN->V && 1674 (I->RLAmt % 32) == IP->RLAmt && (I->RLAmt % 32) == IN->RLAmt && 1675 IP->EndIdx == 31 && IN->StartIdx == 0 && I != IP && 1676 IsAllLow32(*I)) { 1677 1678 LLVM_DEBUG(dbgs() << "\tcombining bit group for " << I->V.getNode() 1679 << " RLAmt = " << I->RLAmt << " [" << I->StartIdx 1680 << ", " << I->EndIdx 1681 << "] with 32-bit replicated groups with ranges [" 1682 << IP->StartIdx << ", " << IP->EndIdx << "] and [" 1683 << IN->StartIdx << ", " << IN->EndIdx << "]\n"); 1684 1685 if (IP == IN) { 1686 // There is only one other group; change it to cover the whole 1687 // range (backward, so that it can still be Repl32 but cover the 1688 // whole 64-bit range). 1689 IP->StartIdx = 31; 1690 IP->EndIdx = 30; 1691 IP->Repl32CR = IP->Repl32CR || I->RLAmt >= 32; 1692 IP->Repl32Coalesced = true; 1693 I = BitGroups.erase(I); 1694 } else { 1695 // There are two separate groups, one before this group and one 1696 // after us (at the beginning). We're going to remove this group, 1697 // but also the group at the very beginning. 1698 IP->EndIdx = IN->EndIdx; 1699 IP->Repl32CR = IP->Repl32CR || IN->Repl32CR || I->RLAmt >= 32; 1700 IP->Repl32Coalesced = true; 1701 I = BitGroups.erase(I); 1702 BitGroups.erase(BitGroups.begin()); 1703 } 1704 1705 // This must be the last group in the vector (and we might have 1706 // just invalidated the iterator above), so break here. 1707 break; 1708 } 1709 } 1710 } 1711 1712 ++I; 1713 } 1714 } 1715 1716 SDValue getI32Imm(unsigned Imm, const SDLoc &dl) { 1717 return CurDAG->getTargetConstant(Imm, dl, MVT::i32); 1718 } 1719 1720 uint64_t getZerosMask() { 1721 uint64_t Mask = 0; 1722 for (unsigned i = 0; i < Bits.size(); ++i) { 1723 if (Bits[i].hasValue()) 1724 continue; 1725 Mask |= (UINT64_C(1) << i); 1726 } 1727 1728 return ~Mask; 1729 } 1730 1731 // This method extends an input value to 64 bit if input is 32-bit integer. 1732 // While selecting instructions in BitPermutationSelector in 64-bit mode, 1733 // an input value can be a 32-bit integer if a ZERO_EXTEND node is included. 1734 // In such case, we extend it to 64 bit to be consistent with other values. 1735 SDValue ExtendToInt64(SDValue V, const SDLoc &dl) { 1736 if (V.getValueSizeInBits() == 64) 1737 return V; 1738 1739 assert(V.getValueSizeInBits() == 32); 1740 SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32); 1741 SDValue ImDef = SDValue(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, 1742 MVT::i64), 0); 1743 SDValue ExtVal = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, 1744 MVT::i64, ImDef, V, 1745 SubRegIdx), 0); 1746 return ExtVal; 1747 } 1748 1749 SDValue TruncateToInt32(SDValue V, const SDLoc &dl) { 1750 if (V.getValueSizeInBits() == 32) 1751 return V; 1752 1753 assert(V.getValueSizeInBits() == 64); 1754 SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32); 1755 SDValue SubVal = SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl, 1756 MVT::i32, V, SubRegIdx), 0); 1757 return SubVal; 1758 } 1759 1760 // Depending on the number of groups for a particular value, it might be 1761 // better to rotate, mask explicitly (using andi/andis), and then or the 1762 // result. Select this part of the result first. 1763 void SelectAndParts32(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) { 1764 if (BPermRewriterNoMasking) 1765 return; 1766 1767 for (ValueRotInfo &VRI : ValueRotsVec) { 1768 unsigned Mask = 0; 1769 for (unsigned i = 0; i < Bits.size(); ++i) { 1770 if (!Bits[i].hasValue() || Bits[i].getValue() != VRI.V) 1771 continue; 1772 if (RLAmt[i] != VRI.RLAmt) 1773 continue; 1774 Mask |= (1u << i); 1775 } 1776 1777 // Compute the masks for andi/andis that would be necessary. 1778 unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16; 1779 assert((ANDIMask != 0 || ANDISMask != 0) && 1780 "No set bits in mask for value bit groups"); 1781 bool NeedsRotate = VRI.RLAmt != 0; 1782 1783 // We're trying to minimize the number of instructions. If we have one 1784 // group, using one of andi/andis can break even. If we have three 1785 // groups, we can use both andi and andis and break even (to use both 1786 // andi and andis we also need to or the results together). We need four 1787 // groups if we also need to rotate. To use andi/andis we need to do more 1788 // than break even because rotate-and-mask instructions tend to be easier 1789 // to schedule. 1790 1791 // FIXME: We've biased here against using andi/andis, which is right for 1792 // POWER cores, but not optimal everywhere. For example, on the A2, 1793 // andi/andis have single-cycle latency whereas the rotate-and-mask 1794 // instructions take two cycles, and it would be better to bias toward 1795 // andi/andis in break-even cases. 1796 1797 unsigned NumAndInsts = (unsigned) NeedsRotate + 1798 (unsigned) (ANDIMask != 0) + 1799 (unsigned) (ANDISMask != 0) + 1800 (unsigned) (ANDIMask != 0 && ANDISMask != 0) + 1801 (unsigned) (bool) Res; 1802 1803 LLVM_DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode() 1804 << " RL: " << VRI.RLAmt << ":" 1805 << "\n\t\t\tisel using masking: " << NumAndInsts 1806 << " using rotates: " << VRI.NumGroups << "\n"); 1807 1808 if (NumAndInsts >= VRI.NumGroups) 1809 continue; 1810 1811 LLVM_DEBUG(dbgs() << "\t\t\t\tusing masking\n"); 1812 1813 if (InstCnt) *InstCnt += NumAndInsts; 1814 1815 SDValue VRot; 1816 if (VRI.RLAmt) { 1817 SDValue Ops[] = 1818 { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl), 1819 getI32Imm(0, dl), getI32Imm(31, dl) }; 1820 VRot = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, 1821 Ops), 0); 1822 } else { 1823 VRot = TruncateToInt32(VRI.V, dl); 1824 } 1825 1826 SDValue ANDIVal, ANDISVal; 1827 if (ANDIMask != 0) 1828 ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI_rec, dl, MVT::i32, 1829 VRot, getI32Imm(ANDIMask, dl)), 1830 0); 1831 if (ANDISMask != 0) 1832 ANDISVal = 1833 SDValue(CurDAG->getMachineNode(PPC::ANDIS_rec, dl, MVT::i32, VRot, 1834 getI32Imm(ANDISMask, dl)), 1835 0); 1836 1837 SDValue TotalVal; 1838 if (!ANDIVal) 1839 TotalVal = ANDISVal; 1840 else if (!ANDISVal) 1841 TotalVal = ANDIVal; 1842 else 1843 TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32, 1844 ANDIVal, ANDISVal), 0); 1845 1846 if (!Res) 1847 Res = TotalVal; 1848 else 1849 Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32, 1850 Res, TotalVal), 0); 1851 1852 // Now, remove all groups with this underlying value and rotation 1853 // factor. 1854 eraseMatchingBitGroups([VRI](const BitGroup &BG) { 1855 return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt; 1856 }); 1857 } 1858 } 1859 1860 // Instruction selection for the 32-bit case. 1861 SDNode *Select32(SDNode *N, bool LateMask, unsigned *InstCnt) { 1862 SDLoc dl(N); 1863 SDValue Res; 1864 1865 if (InstCnt) *InstCnt = 0; 1866 1867 // Take care of cases that should use andi/andis first. 1868 SelectAndParts32(dl, Res, InstCnt); 1869 1870 // If we've not yet selected a 'starting' instruction, and we have no zeros 1871 // to fill in, select the (Value, RLAmt) with the highest priority (largest 1872 // number of groups), and start with this rotated value. 1873 if ((!NeedMask || LateMask) && !Res) { 1874 ValueRotInfo &VRI = ValueRotsVec[0]; 1875 if (VRI.RLAmt) { 1876 if (InstCnt) *InstCnt += 1; 1877 SDValue Ops[] = 1878 { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl), 1879 getI32Imm(0, dl), getI32Imm(31, dl) }; 1880 Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 1881 0); 1882 } else { 1883 Res = TruncateToInt32(VRI.V, dl); 1884 } 1885 1886 // Now, remove all groups with this underlying value and rotation factor. 1887 eraseMatchingBitGroups([VRI](const BitGroup &BG) { 1888 return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt; 1889 }); 1890 } 1891 1892 if (InstCnt) *InstCnt += BitGroups.size(); 1893 1894 // Insert the other groups (one at a time). 1895 for (auto &BG : BitGroups) { 1896 if (!Res) { 1897 SDValue Ops[] = 1898 { TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl), 1899 getI32Imm(Bits.size() - BG.EndIdx - 1, dl), 1900 getI32Imm(Bits.size() - BG.StartIdx - 1, dl) }; 1901 Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0); 1902 } else { 1903 SDValue Ops[] = 1904 { Res, TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl), 1905 getI32Imm(Bits.size() - BG.EndIdx - 1, dl), 1906 getI32Imm(Bits.size() - BG.StartIdx - 1, dl) }; 1907 Res = SDValue(CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops), 0); 1908 } 1909 } 1910 1911 if (LateMask) { 1912 unsigned Mask = (unsigned) getZerosMask(); 1913 1914 unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16; 1915 assert((ANDIMask != 0 || ANDISMask != 0) && 1916 "No set bits in zeros mask?"); 1917 1918 if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) + 1919 (unsigned) (ANDISMask != 0) + 1920 (unsigned) (ANDIMask != 0 && ANDISMask != 0); 1921 1922 SDValue ANDIVal, ANDISVal; 1923 if (ANDIMask != 0) 1924 ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI_rec, dl, MVT::i32, 1925 Res, getI32Imm(ANDIMask, dl)), 1926 0); 1927 if (ANDISMask != 0) 1928 ANDISVal = 1929 SDValue(CurDAG->getMachineNode(PPC::ANDIS_rec, dl, MVT::i32, Res, 1930 getI32Imm(ANDISMask, dl)), 1931 0); 1932 1933 if (!ANDIVal) 1934 Res = ANDISVal; 1935 else if (!ANDISVal) 1936 Res = ANDIVal; 1937 else 1938 Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32, 1939 ANDIVal, ANDISVal), 0); 1940 } 1941 1942 return Res.getNode(); 1943 } 1944 1945 unsigned SelectRotMask64Count(unsigned RLAmt, bool Repl32, 1946 unsigned MaskStart, unsigned MaskEnd, 1947 bool IsIns) { 1948 // In the notation used by the instructions, 'start' and 'end' are reversed 1949 // because bits are counted from high to low order. 1950 unsigned InstMaskStart = 64 - MaskEnd - 1, 1951 InstMaskEnd = 64 - MaskStart - 1; 1952 1953 if (Repl32) 1954 return 1; 1955 1956 if ((!IsIns && (InstMaskEnd == 63 || InstMaskStart == 0)) || 1957 InstMaskEnd == 63 - RLAmt) 1958 return 1; 1959 1960 return 2; 1961 } 1962 1963 // For 64-bit values, not all combinations of rotates and masks are 1964 // available. Produce one if it is available. 1965 SDValue SelectRotMask64(SDValue V, const SDLoc &dl, unsigned RLAmt, 1966 bool Repl32, unsigned MaskStart, unsigned MaskEnd, 1967 unsigned *InstCnt = nullptr) { 1968 // In the notation used by the instructions, 'start' and 'end' are reversed 1969 // because bits are counted from high to low order. 1970 unsigned InstMaskStart = 64 - MaskEnd - 1, 1971 InstMaskEnd = 64 - MaskStart - 1; 1972 1973 if (InstCnt) *InstCnt += 1; 1974 1975 if (Repl32) { 1976 // This rotation amount assumes that the lower 32 bits of the quantity 1977 // are replicated in the high 32 bits by the rotation operator (which is 1978 // done by rlwinm and friends). 1979 assert(InstMaskStart >= 32 && "Mask cannot start out of range"); 1980 assert(InstMaskEnd >= 32 && "Mask cannot end out of range"); 1981 SDValue Ops[] = 1982 { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl), 1983 getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) }; 1984 return SDValue(CurDAG->getMachineNode(PPC::RLWINM8, dl, MVT::i64, 1985 Ops), 0); 1986 } 1987 1988 if (InstMaskEnd == 63) { 1989 SDValue Ops[] = 1990 { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl), 1991 getI32Imm(InstMaskStart, dl) }; 1992 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Ops), 0); 1993 } 1994 1995 if (InstMaskStart == 0) { 1996 SDValue Ops[] = 1997 { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl), 1998 getI32Imm(InstMaskEnd, dl) }; 1999 return SDValue(CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Ops), 0); 2000 } 2001 2002 if (InstMaskEnd == 63 - RLAmt) { 2003 SDValue Ops[] = 2004 { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl), 2005 getI32Imm(InstMaskStart, dl) }; 2006 return SDValue(CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, Ops), 0); 2007 } 2008 2009 // We cannot do this with a single instruction, so we'll use two. The 2010 // problem is that we're not free to choose both a rotation amount and mask 2011 // start and end independently. We can choose an arbitrary mask start and 2012 // end, but then the rotation amount is fixed. Rotation, however, can be 2013 // inverted, and so by applying an "inverse" rotation first, we can get the 2014 // desired result. 2015 if (InstCnt) *InstCnt += 1; 2016 2017 // The rotation mask for the second instruction must be MaskStart. 2018 unsigned RLAmt2 = MaskStart; 2019 // The first instruction must rotate V so that the overall rotation amount 2020 // is RLAmt. 2021 unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64; 2022 if (RLAmt1) 2023 V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63); 2024 return SelectRotMask64(V, dl, RLAmt2, false, MaskStart, MaskEnd); 2025 } 2026 2027 // For 64-bit values, not all combinations of rotates and masks are 2028 // available. Produce a rotate-mask-and-insert if one is available. 2029 SDValue SelectRotMaskIns64(SDValue Base, SDValue V, const SDLoc &dl, 2030 unsigned RLAmt, bool Repl32, unsigned MaskStart, 2031 unsigned MaskEnd, unsigned *InstCnt = nullptr) { 2032 // In the notation used by the instructions, 'start' and 'end' are reversed 2033 // because bits are counted from high to low order. 2034 unsigned InstMaskStart = 64 - MaskEnd - 1, 2035 InstMaskEnd = 64 - MaskStart - 1; 2036 2037 if (InstCnt) *InstCnt += 1; 2038 2039 if (Repl32) { 2040 // This rotation amount assumes that the lower 32 bits of the quantity 2041 // are replicated in the high 32 bits by the rotation operator (which is 2042 // done by rlwinm and friends). 2043 assert(InstMaskStart >= 32 && "Mask cannot start out of range"); 2044 assert(InstMaskEnd >= 32 && "Mask cannot end out of range"); 2045 SDValue Ops[] = 2046 { ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl), 2047 getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) }; 2048 return SDValue(CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64, 2049 Ops), 0); 2050 } 2051 2052 if (InstMaskEnd == 63 - RLAmt) { 2053 SDValue Ops[] = 2054 { ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl), 2055 getI32Imm(InstMaskStart, dl) }; 2056 return SDValue(CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops), 0); 2057 } 2058 2059 // We cannot do this with a single instruction, so we'll use two. The 2060 // problem is that we're not free to choose both a rotation amount and mask 2061 // start and end independently. We can choose an arbitrary mask start and 2062 // end, but then the rotation amount is fixed. Rotation, however, can be 2063 // inverted, and so by applying an "inverse" rotation first, we can get the 2064 // desired result. 2065 if (InstCnt) *InstCnt += 1; 2066 2067 // The rotation mask for the second instruction must be MaskStart. 2068 unsigned RLAmt2 = MaskStart; 2069 // The first instruction must rotate V so that the overall rotation amount 2070 // is RLAmt. 2071 unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64; 2072 if (RLAmt1) 2073 V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63); 2074 return SelectRotMaskIns64(Base, V, dl, RLAmt2, false, MaskStart, MaskEnd); 2075 } 2076 2077 void SelectAndParts64(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) { 2078 if (BPermRewriterNoMasking) 2079 return; 2080 2081 // The idea here is the same as in the 32-bit version, but with additional 2082 // complications from the fact that Repl32 might be true. Because we 2083 // aggressively convert bit groups to Repl32 form (which, for small 2084 // rotation factors, involves no other change), and then coalesce, it might 2085 // be the case that a single 64-bit masking operation could handle both 2086 // some Repl32 groups and some non-Repl32 groups. If converting to Repl32 2087 // form allowed coalescing, then we must use a 32-bit rotaton in order to 2088 // completely capture the new combined bit group. 2089 2090 for (ValueRotInfo &VRI : ValueRotsVec) { 2091 uint64_t Mask = 0; 2092 2093 // We need to add to the mask all bits from the associated bit groups. 2094 // If Repl32 is false, we need to add bits from bit groups that have 2095 // Repl32 true, but are trivially convertable to Repl32 false. Such a 2096 // group is trivially convertable if it overlaps only with the lower 32 2097 // bits, and the group has not been coalesced. 2098 auto MatchingBG = [VRI](const BitGroup &BG) { 2099 if (VRI.V != BG.V) 2100 return false; 2101 2102 unsigned EffRLAmt = BG.RLAmt; 2103 if (!VRI.Repl32 && BG.Repl32) { 2104 if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx <= BG.EndIdx && 2105 !BG.Repl32Coalesced) { 2106 if (BG.Repl32CR) 2107 EffRLAmt += 32; 2108 } else { 2109 return false; 2110 } 2111 } else if (VRI.Repl32 != BG.Repl32) { 2112 return false; 2113 } 2114 2115 return VRI.RLAmt == EffRLAmt; 2116 }; 2117 2118 for (auto &BG : BitGroups) { 2119 if (!MatchingBG(BG)) 2120 continue; 2121 2122 if (BG.StartIdx <= BG.EndIdx) { 2123 for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i) 2124 Mask |= (UINT64_C(1) << i); 2125 } else { 2126 for (unsigned i = BG.StartIdx; i < Bits.size(); ++i) 2127 Mask |= (UINT64_C(1) << i); 2128 for (unsigned i = 0; i <= BG.EndIdx; ++i) 2129 Mask |= (UINT64_C(1) << i); 2130 } 2131 } 2132 2133 // We can use the 32-bit andi/andis technique if the mask does not 2134 // require any higher-order bits. This can save an instruction compared 2135 // to always using the general 64-bit technique. 2136 bool Use32BitInsts = isUInt<32>(Mask); 2137 // Compute the masks for andi/andis that would be necessary. 2138 unsigned ANDIMask = (Mask & UINT16_MAX), 2139 ANDISMask = (Mask >> 16) & UINT16_MAX; 2140 2141 bool NeedsRotate = VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask)); 2142 2143 unsigned NumAndInsts = (unsigned) NeedsRotate + 2144 (unsigned) (bool) Res; 2145 if (Use32BitInsts) 2146 NumAndInsts += (unsigned) (ANDIMask != 0) + (unsigned) (ANDISMask != 0) + 2147 (unsigned) (ANDIMask != 0 && ANDISMask != 0); 2148 else 2149 NumAndInsts += selectI64ImmInstrCount(Mask) + /* and */ 1; 2150 2151 unsigned NumRLInsts = 0; 2152 bool FirstBG = true; 2153 bool MoreBG = false; 2154 for (auto &BG : BitGroups) { 2155 if (!MatchingBG(BG)) { 2156 MoreBG = true; 2157 continue; 2158 } 2159 NumRLInsts += 2160 SelectRotMask64Count(BG.RLAmt, BG.Repl32, BG.StartIdx, BG.EndIdx, 2161 !FirstBG); 2162 FirstBG = false; 2163 } 2164 2165 LLVM_DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode() 2166 << " RL: " << VRI.RLAmt << (VRI.Repl32 ? " (32):" : ":") 2167 << "\n\t\t\tisel using masking: " << NumAndInsts 2168 << " using rotates: " << NumRLInsts << "\n"); 2169 2170 // When we'd use andi/andis, we bias toward using the rotates (andi only 2171 // has a record form, and is cracked on POWER cores). However, when using 2172 // general 64-bit constant formation, bias toward the constant form, 2173 // because that exposes more opportunities for CSE. 2174 if (NumAndInsts > NumRLInsts) 2175 continue; 2176 // When merging multiple bit groups, instruction or is used. 2177 // But when rotate is used, rldimi can inert the rotated value into any 2178 // register, so instruction or can be avoided. 2179 if ((Use32BitInsts || MoreBG) && NumAndInsts == NumRLInsts) 2180 continue; 2181 2182 LLVM_DEBUG(dbgs() << "\t\t\t\tusing masking\n"); 2183 2184 if (InstCnt) *InstCnt += NumAndInsts; 2185 2186 SDValue VRot; 2187 // We actually need to generate a rotation if we have a non-zero rotation 2188 // factor or, in the Repl32 case, if we care about any of the 2189 // higher-order replicated bits. In the latter case, we generate a mask 2190 // backward so that it actually includes the entire 64 bits. 2191 if (VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask))) 2192 VRot = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32, 2193 VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63); 2194 else 2195 VRot = VRI.V; 2196 2197 SDValue TotalVal; 2198 if (Use32BitInsts) { 2199 assert((ANDIMask != 0 || ANDISMask != 0) && 2200 "No set bits in mask when using 32-bit ands for 64-bit value"); 2201 2202 SDValue ANDIVal, ANDISVal; 2203 if (ANDIMask != 0) 2204 ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI8_rec, dl, MVT::i64, 2205 ExtendToInt64(VRot, dl), 2206 getI32Imm(ANDIMask, dl)), 2207 0); 2208 if (ANDISMask != 0) 2209 ANDISVal = 2210 SDValue(CurDAG->getMachineNode(PPC::ANDIS8_rec, dl, MVT::i64, 2211 ExtendToInt64(VRot, dl), 2212 getI32Imm(ANDISMask, dl)), 2213 0); 2214 2215 if (!ANDIVal) 2216 TotalVal = ANDISVal; 2217 else if (!ANDISVal) 2218 TotalVal = ANDIVal; 2219 else 2220 TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64, 2221 ExtendToInt64(ANDIVal, dl), ANDISVal), 0); 2222 } else { 2223 TotalVal = SDValue(selectI64Imm(CurDAG, dl, Mask), 0); 2224 TotalVal = 2225 SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64, 2226 ExtendToInt64(VRot, dl), TotalVal), 2227 0); 2228 } 2229 2230 if (!Res) 2231 Res = TotalVal; 2232 else 2233 Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64, 2234 ExtendToInt64(Res, dl), TotalVal), 2235 0); 2236 2237 // Now, remove all groups with this underlying value and rotation 2238 // factor. 2239 eraseMatchingBitGroups(MatchingBG); 2240 } 2241 } 2242 2243 // Instruction selection for the 64-bit case. 2244 SDNode *Select64(SDNode *N, bool LateMask, unsigned *InstCnt) { 2245 SDLoc dl(N); 2246 SDValue Res; 2247 2248 if (InstCnt) *InstCnt = 0; 2249 2250 // Take care of cases that should use andi/andis first. 2251 SelectAndParts64(dl, Res, InstCnt); 2252 2253 // If we've not yet selected a 'starting' instruction, and we have no zeros 2254 // to fill in, select the (Value, RLAmt) with the highest priority (largest 2255 // number of groups), and start with this rotated value. 2256 if ((!NeedMask || LateMask) && !Res) { 2257 // If we have both Repl32 groups and non-Repl32 groups, the non-Repl32 2258 // groups will come first, and so the VRI representing the largest number 2259 // of groups might not be first (it might be the first Repl32 groups). 2260 unsigned MaxGroupsIdx = 0; 2261 if (!ValueRotsVec[0].Repl32) { 2262 for (unsigned i = 0, ie = ValueRotsVec.size(); i < ie; ++i) 2263 if (ValueRotsVec[i].Repl32) { 2264 if (ValueRotsVec[i].NumGroups > ValueRotsVec[0].NumGroups) 2265 MaxGroupsIdx = i; 2266 break; 2267 } 2268 } 2269 2270 ValueRotInfo &VRI = ValueRotsVec[MaxGroupsIdx]; 2271 bool NeedsRotate = false; 2272 if (VRI.RLAmt) { 2273 NeedsRotate = true; 2274 } else if (VRI.Repl32) { 2275 for (auto &BG : BitGroups) { 2276 if (BG.V != VRI.V || BG.RLAmt != VRI.RLAmt || 2277 BG.Repl32 != VRI.Repl32) 2278 continue; 2279 2280 // We don't need a rotate if the bit group is confined to the lower 2281 // 32 bits. 2282 if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx < BG.EndIdx) 2283 continue; 2284 2285 NeedsRotate = true; 2286 break; 2287 } 2288 } 2289 2290 if (NeedsRotate) 2291 Res = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32, 2292 VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63, 2293 InstCnt); 2294 else 2295 Res = VRI.V; 2296 2297 // Now, remove all groups with this underlying value and rotation factor. 2298 if (Res) 2299 eraseMatchingBitGroups([VRI](const BitGroup &BG) { 2300 return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt && 2301 BG.Repl32 == VRI.Repl32; 2302 }); 2303 } 2304 2305 // Because 64-bit rotates are more flexible than inserts, we might have a 2306 // preference regarding which one we do first (to save one instruction). 2307 if (!Res) 2308 for (auto I = BitGroups.begin(), IE = BitGroups.end(); I != IE; ++I) { 2309 if (SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx, 2310 false) < 2311 SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx, 2312 true)) { 2313 if (I != BitGroups.begin()) { 2314 BitGroup BG = *I; 2315 BitGroups.erase(I); 2316 BitGroups.insert(BitGroups.begin(), BG); 2317 } 2318 2319 break; 2320 } 2321 } 2322 2323 // Insert the other groups (one at a time). 2324 for (auto &BG : BitGroups) { 2325 if (!Res) 2326 Res = SelectRotMask64(BG.V, dl, BG.RLAmt, BG.Repl32, BG.StartIdx, 2327 BG.EndIdx, InstCnt); 2328 else 2329 Res = SelectRotMaskIns64(Res, BG.V, dl, BG.RLAmt, BG.Repl32, 2330 BG.StartIdx, BG.EndIdx, InstCnt); 2331 } 2332 2333 if (LateMask) { 2334 uint64_t Mask = getZerosMask(); 2335 2336 // We can use the 32-bit andi/andis technique if the mask does not 2337 // require any higher-order bits. This can save an instruction compared 2338 // to always using the general 64-bit technique. 2339 bool Use32BitInsts = isUInt<32>(Mask); 2340 // Compute the masks for andi/andis that would be necessary. 2341 unsigned ANDIMask = (Mask & UINT16_MAX), 2342 ANDISMask = (Mask >> 16) & UINT16_MAX; 2343 2344 if (Use32BitInsts) { 2345 assert((ANDIMask != 0 || ANDISMask != 0) && 2346 "No set bits in mask when using 32-bit ands for 64-bit value"); 2347 2348 if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) + 2349 (unsigned) (ANDISMask != 0) + 2350 (unsigned) (ANDIMask != 0 && ANDISMask != 0); 2351 2352 SDValue ANDIVal, ANDISVal; 2353 if (ANDIMask != 0) 2354 ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI8_rec, dl, MVT::i64, 2355 ExtendToInt64(Res, dl), 2356 getI32Imm(ANDIMask, dl)), 2357 0); 2358 if (ANDISMask != 0) 2359 ANDISVal = 2360 SDValue(CurDAG->getMachineNode(PPC::ANDIS8_rec, dl, MVT::i64, 2361 ExtendToInt64(Res, dl), 2362 getI32Imm(ANDISMask, dl)), 2363 0); 2364 2365 if (!ANDIVal) 2366 Res = ANDISVal; 2367 else if (!ANDISVal) 2368 Res = ANDIVal; 2369 else 2370 Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64, 2371 ExtendToInt64(ANDIVal, dl), ANDISVal), 0); 2372 } else { 2373 if (InstCnt) *InstCnt += selectI64ImmInstrCount(Mask) + /* and */ 1; 2374 2375 SDValue MaskVal = SDValue(selectI64Imm(CurDAG, dl, Mask), 0); 2376 Res = 2377 SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64, 2378 ExtendToInt64(Res, dl), MaskVal), 0); 2379 } 2380 } 2381 2382 return Res.getNode(); 2383 } 2384 2385 SDNode *Select(SDNode *N, bool LateMask, unsigned *InstCnt = nullptr) { 2386 // Fill in BitGroups. 2387 collectBitGroups(LateMask); 2388 if (BitGroups.empty()) 2389 return nullptr; 2390 2391 // For 64-bit values, figure out when we can use 32-bit instructions. 2392 if (Bits.size() == 64) 2393 assignRepl32BitGroups(); 2394 2395 // Fill in ValueRotsVec. 2396 collectValueRotInfo(); 2397 2398 if (Bits.size() == 32) { 2399 return Select32(N, LateMask, InstCnt); 2400 } else { 2401 assert(Bits.size() == 64 && "Not 64 bits here?"); 2402 return Select64(N, LateMask, InstCnt); 2403 } 2404 2405 return nullptr; 2406 } 2407 2408 void eraseMatchingBitGroups(function_ref<bool(const BitGroup &)> F) { 2409 BitGroups.erase(remove_if(BitGroups, F), BitGroups.end()); 2410 } 2411 2412 SmallVector<ValueBit, 64> Bits; 2413 2414 bool NeedMask = false; 2415 SmallVector<unsigned, 64> RLAmt; 2416 2417 SmallVector<BitGroup, 16> BitGroups; 2418 2419 DenseMap<std::pair<SDValue, unsigned>, ValueRotInfo> ValueRots; 2420 SmallVector<ValueRotInfo, 16> ValueRotsVec; 2421 2422 SelectionDAG *CurDAG = nullptr; 2423 2424 public: 2425 BitPermutationSelector(SelectionDAG *DAG) 2426 : CurDAG(DAG) {} 2427 2428 // Here we try to match complex bit permutations into a set of 2429 // rotate-and-shift/shift/and/or instructions, using a set of heuristics 2430 // known to produce optimal code for common cases (like i32 byte swapping). 2431 SDNode *Select(SDNode *N) { 2432 Memoizer.clear(); 2433 auto Result = 2434 getValueBits(SDValue(N, 0), N->getValueType(0).getSizeInBits()); 2435 if (!Result.first) 2436 return nullptr; 2437 Bits = std::move(*Result.second); 2438 2439 LLVM_DEBUG(dbgs() << "Considering bit-permutation-based instruction" 2440 " selection for: "); 2441 LLVM_DEBUG(N->dump(CurDAG)); 2442 2443 // Fill it RLAmt and set NeedMask. 2444 computeRotationAmounts(); 2445 2446 if (!NeedMask) 2447 return Select(N, false); 2448 2449 // We currently have two techniques for handling results with zeros: early 2450 // masking (the default) and late masking. Late masking is sometimes more 2451 // efficient, but because the structure of the bit groups is different, it 2452 // is hard to tell without generating both and comparing the results. With 2453 // late masking, we ignore zeros in the resulting value when inserting each 2454 // set of bit groups, and then mask in the zeros at the end. With early 2455 // masking, we only insert the non-zero parts of the result at every step. 2456 2457 unsigned InstCnt = 0, InstCntLateMask = 0; 2458 LLVM_DEBUG(dbgs() << "\tEarly masking:\n"); 2459 SDNode *RN = Select(N, false, &InstCnt); 2460 LLVM_DEBUG(dbgs() << "\t\tisel would use " << InstCnt << " instructions\n"); 2461 2462 LLVM_DEBUG(dbgs() << "\tLate masking:\n"); 2463 SDNode *RNLM = Select(N, true, &InstCntLateMask); 2464 LLVM_DEBUG(dbgs() << "\t\tisel would use " << InstCntLateMask 2465 << " instructions\n"); 2466 2467 if (InstCnt <= InstCntLateMask) { 2468 LLVM_DEBUG(dbgs() << "\tUsing early-masking for isel\n"); 2469 return RN; 2470 } 2471 2472 LLVM_DEBUG(dbgs() << "\tUsing late-masking for isel\n"); 2473 return RNLM; 2474 } 2475 }; 2476 2477 class IntegerCompareEliminator { 2478 SelectionDAG *CurDAG; 2479 PPCDAGToDAGISel *S; 2480 // Conversion type for interpreting results of a 32-bit instruction as 2481 // a 64-bit value or vice versa. 2482 enum ExtOrTruncConversion { Ext, Trunc }; 2483 2484 // Modifiers to guide how an ISD::SETCC node's result is to be computed 2485 // in a GPR. 2486 // ZExtOrig - use the original condition code, zero-extend value 2487 // ZExtInvert - invert the condition code, zero-extend value 2488 // SExtOrig - use the original condition code, sign-extend value 2489 // SExtInvert - invert the condition code, sign-extend value 2490 enum SetccInGPROpts { ZExtOrig, ZExtInvert, SExtOrig, SExtInvert }; 2491 2492 // Comparisons against zero to emit GPR code sequences for. Each of these 2493 // sequences may need to be emitted for two or more equivalent patterns. 2494 // For example (a >= 0) == (a > -1). The direction of the comparison (</>) 2495 // matters as well as the extension type: sext (-1/0), zext (1/0). 2496 // GEZExt - (zext (LHS >= 0)) 2497 // GESExt - (sext (LHS >= 0)) 2498 // LEZExt - (zext (LHS <= 0)) 2499 // LESExt - (sext (LHS <= 0)) 2500 enum ZeroCompare { GEZExt, GESExt, LEZExt, LESExt }; 2501 2502 SDNode *tryEXTEND(SDNode *N); 2503 SDNode *tryLogicOpOfCompares(SDNode *N); 2504 SDValue computeLogicOpInGPR(SDValue LogicOp); 2505 SDValue signExtendInputIfNeeded(SDValue Input); 2506 SDValue zeroExtendInputIfNeeded(SDValue Input); 2507 SDValue addExtOrTrunc(SDValue NatWidthRes, ExtOrTruncConversion Conv); 2508 SDValue getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl, 2509 ZeroCompare CmpTy); 2510 SDValue get32BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, 2511 int64_t RHSValue, SDLoc dl); 2512 SDValue get32BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, 2513 int64_t RHSValue, SDLoc dl); 2514 SDValue get64BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, 2515 int64_t RHSValue, SDLoc dl); 2516 SDValue get64BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC, 2517 int64_t RHSValue, SDLoc dl); 2518 SDValue getSETCCInGPR(SDValue Compare, SetccInGPROpts ConvOpts); 2519 2520 public: 2521 IntegerCompareEliminator(SelectionDAG *DAG, 2522 PPCDAGToDAGISel *Sel) : CurDAG(DAG), S(Sel) { 2523 assert(CurDAG->getTargetLoweringInfo() 2524 .getPointerTy(CurDAG->getDataLayout()).getSizeInBits() == 64 && 2525 "Only expecting to use this on 64 bit targets."); 2526 } 2527 SDNode *Select(SDNode *N) { 2528 if (CmpInGPR == ICGPR_None) 2529 return nullptr; 2530 switch (N->getOpcode()) { 2531 default: break; 2532 case ISD::ZERO_EXTEND: 2533 if (CmpInGPR == ICGPR_Sext || CmpInGPR == ICGPR_SextI32 || 2534 CmpInGPR == ICGPR_SextI64) 2535 return nullptr; 2536 LLVM_FALLTHROUGH; 2537 case ISD::SIGN_EXTEND: 2538 if (CmpInGPR == ICGPR_Zext || CmpInGPR == ICGPR_ZextI32 || 2539 CmpInGPR == ICGPR_ZextI64) 2540 return nullptr; 2541 return tryEXTEND(N); 2542 case ISD::AND: 2543 case ISD::OR: 2544 case ISD::XOR: 2545 return tryLogicOpOfCompares(N); 2546 } 2547 return nullptr; 2548 } 2549 }; 2550 2551 static bool isLogicOp(unsigned Opc) { 2552 return Opc == ISD::AND || Opc == ISD::OR || Opc == ISD::XOR; 2553 } 2554 // The obvious case for wanting to keep the value in a GPR. Namely, the 2555 // result of the comparison is actually needed in a GPR. 2556 SDNode *IntegerCompareEliminator::tryEXTEND(SDNode *N) { 2557 assert((N->getOpcode() == ISD::ZERO_EXTEND || 2558 N->getOpcode() == ISD::SIGN_EXTEND) && 2559 "Expecting a zero/sign extend node!"); 2560 SDValue WideRes; 2561 // If we are zero-extending the result of a logical operation on i1 2562 // values, we can keep the values in GPRs. 2563 if (isLogicOp(N->getOperand(0).getOpcode()) && 2564 N->getOperand(0).getValueType() == MVT::i1 && 2565 N->getOpcode() == ISD::ZERO_EXTEND) 2566 WideRes = computeLogicOpInGPR(N->getOperand(0)); 2567 else if (N->getOperand(0).getOpcode() != ISD::SETCC) 2568 return nullptr; 2569 else 2570 WideRes = 2571 getSETCCInGPR(N->getOperand(0), 2572 N->getOpcode() == ISD::SIGN_EXTEND ? 2573 SetccInGPROpts::SExtOrig : SetccInGPROpts::ZExtOrig); 2574 2575 if (!WideRes) 2576 return nullptr; 2577 2578 SDLoc dl(N); 2579 bool Input32Bit = WideRes.getValueType() == MVT::i32; 2580 bool Output32Bit = N->getValueType(0) == MVT::i32; 2581 2582 NumSextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 1 : 0; 2583 NumZextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 0 : 1; 2584 2585 SDValue ConvOp = WideRes; 2586 if (Input32Bit != Output32Bit) 2587 ConvOp = addExtOrTrunc(WideRes, Input32Bit ? ExtOrTruncConversion::Ext : 2588 ExtOrTruncConversion::Trunc); 2589 return ConvOp.getNode(); 2590 } 2591 2592 // Attempt to perform logical operations on the results of comparisons while 2593 // keeping the values in GPRs. Without doing so, these would end up being 2594 // lowered to CR-logical operations which suffer from significant latency and 2595 // low ILP. 2596 SDNode *IntegerCompareEliminator::tryLogicOpOfCompares(SDNode *N) { 2597 if (N->getValueType(0) != MVT::i1) 2598 return nullptr; 2599 assert(isLogicOp(N->getOpcode()) && 2600 "Expected a logic operation on setcc results."); 2601 SDValue LoweredLogical = computeLogicOpInGPR(SDValue(N, 0)); 2602 if (!LoweredLogical) 2603 return nullptr; 2604 2605 SDLoc dl(N); 2606 bool IsBitwiseNegate = LoweredLogical.getMachineOpcode() == PPC::XORI8; 2607 unsigned SubRegToExtract = IsBitwiseNegate ? PPC::sub_eq : PPC::sub_gt; 2608 SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32); 2609 SDValue LHS = LoweredLogical.getOperand(0); 2610 SDValue RHS = LoweredLogical.getOperand(1); 2611 SDValue WideOp; 2612 SDValue OpToConvToRecForm; 2613 2614 // Look through any 32-bit to 64-bit implicit extend nodes to find the 2615 // opcode that is input to the XORI. 2616 if (IsBitwiseNegate && 2617 LoweredLogical.getOperand(0).getMachineOpcode() == PPC::INSERT_SUBREG) 2618 OpToConvToRecForm = LoweredLogical.getOperand(0).getOperand(1); 2619 else if (IsBitwiseNegate) 2620 // If the input to the XORI isn't an extension, that's what we're after. 2621 OpToConvToRecForm = LoweredLogical.getOperand(0); 2622 else 2623 // If this is not an XORI, it is a reg-reg logical op and we can convert 2624 // it to record-form. 2625 OpToConvToRecForm = LoweredLogical; 2626 2627 // Get the record-form version of the node we're looking to use to get the 2628 // CR result from. 2629 uint16_t NonRecOpc = OpToConvToRecForm.getMachineOpcode(); 2630 int NewOpc = PPCInstrInfo::getRecordFormOpcode(NonRecOpc); 2631 2632 // Convert the right node to record-form. This is either the logical we're 2633 // looking at or it is the input node to the negation (if we're looking at 2634 // a bitwise negation). 2635 if (NewOpc != -1 && IsBitwiseNegate) { 2636 // The input to the XORI has a record-form. Use it. 2637 assert(LoweredLogical.getConstantOperandVal(1) == 1 && 2638 "Expected a PPC::XORI8 only for bitwise negation."); 2639 // Emit the record-form instruction. 2640 std::vector<SDValue> Ops; 2641 for (int i = 0, e = OpToConvToRecForm.getNumOperands(); i < e; i++) 2642 Ops.push_back(OpToConvToRecForm.getOperand(i)); 2643 2644 WideOp = 2645 SDValue(CurDAG->getMachineNode(NewOpc, dl, 2646 OpToConvToRecForm.getValueType(), 2647 MVT::Glue, Ops), 0); 2648 } else { 2649 assert((NewOpc != -1 || !IsBitwiseNegate) && 2650 "No record form available for AND8/OR8/XOR8?"); 2651 WideOp = 2652 SDValue(CurDAG->getMachineNode(NewOpc == -1 ? PPC::ANDI8_rec : NewOpc, 2653 dl, MVT::i64, MVT::Glue, LHS, RHS), 2654 0); 2655 } 2656 2657 // Select this node to a single bit from CR0 set by the record-form node 2658 // just created. For bitwise negation, use the EQ bit which is the equivalent 2659 // of negating the result (i.e. it is a bit set when the result of the 2660 // operation is zero). 2661 SDValue SRIdxVal = 2662 CurDAG->getTargetConstant(SubRegToExtract, dl, MVT::i32); 2663 SDValue CRBit = 2664 SDValue(CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, 2665 MVT::i1, CR0Reg, SRIdxVal, 2666 WideOp.getValue(1)), 0); 2667 return CRBit.getNode(); 2668 } 2669 2670 // Lower a logical operation on i1 values into a GPR sequence if possible. 2671 // The result can be kept in a GPR if requested. 2672 // Three types of inputs can be handled: 2673 // - SETCC 2674 // - TRUNCATE 2675 // - Logical operation (AND/OR/XOR) 2676 // There is also a special case that is handled (namely a complement operation 2677 // achieved with xor %a, -1). 2678 SDValue IntegerCompareEliminator::computeLogicOpInGPR(SDValue LogicOp) { 2679 assert(isLogicOp(LogicOp.getOpcode()) && 2680 "Can only handle logic operations here."); 2681 assert(LogicOp.getValueType() == MVT::i1 && 2682 "Can only handle logic operations on i1 values here."); 2683 SDLoc dl(LogicOp); 2684 SDValue LHS, RHS; 2685 2686 // Special case: xor %a, -1 2687 bool IsBitwiseNegation = isBitwiseNot(LogicOp); 2688 2689 // Produces a GPR sequence for each operand of the binary logic operation. 2690 // For SETCC, it produces the respective comparison, for TRUNCATE it truncates 2691 // the value in a GPR and for logic operations, it will recursively produce 2692 // a GPR sequence for the operation. 2693 auto getLogicOperand = [&] (SDValue Operand) -> SDValue { 2694 unsigned OperandOpcode = Operand.getOpcode(); 2695 if (OperandOpcode == ISD::SETCC) 2696 return getSETCCInGPR(Operand, SetccInGPROpts::ZExtOrig); 2697 else if (OperandOpcode == ISD::TRUNCATE) { 2698 SDValue InputOp = Operand.getOperand(0); 2699 EVT InVT = InputOp.getValueType(); 2700 return SDValue(CurDAG->getMachineNode(InVT == MVT::i32 ? PPC::RLDICL_32 : 2701 PPC::RLDICL, dl, InVT, InputOp, 2702 S->getI64Imm(0, dl), 2703 S->getI64Imm(63, dl)), 0); 2704 } else if (isLogicOp(OperandOpcode)) 2705 return computeLogicOpInGPR(Operand); 2706 return SDValue(); 2707 }; 2708 LHS = getLogicOperand(LogicOp.getOperand(0)); 2709 RHS = getLogicOperand(LogicOp.getOperand(1)); 2710 2711 // If a GPR sequence can't be produced for the LHS we can't proceed. 2712 // Not producing a GPR sequence for the RHS is only a problem if this isn't 2713 // a bitwise negation operation. 2714 if (!LHS || (!RHS && !IsBitwiseNegation)) 2715 return SDValue(); 2716 2717 NumLogicOpsOnComparison++; 2718 2719 // We will use the inputs as 64-bit values. 2720 if (LHS.getValueType() == MVT::i32) 2721 LHS = addExtOrTrunc(LHS, ExtOrTruncConversion::Ext); 2722 if (!IsBitwiseNegation && RHS.getValueType() == MVT::i32) 2723 RHS = addExtOrTrunc(RHS, ExtOrTruncConversion::Ext); 2724 2725 unsigned NewOpc; 2726 switch (LogicOp.getOpcode()) { 2727 default: llvm_unreachable("Unknown logic operation."); 2728 case ISD::AND: NewOpc = PPC::AND8; break; 2729 case ISD::OR: NewOpc = PPC::OR8; break; 2730 case ISD::XOR: NewOpc = PPC::XOR8; break; 2731 } 2732 2733 if (IsBitwiseNegation) { 2734 RHS = S->getI64Imm(1, dl); 2735 NewOpc = PPC::XORI8; 2736 } 2737 2738 return SDValue(CurDAG->getMachineNode(NewOpc, dl, MVT::i64, LHS, RHS), 0); 2739 2740 } 2741 2742 /// If the value isn't guaranteed to be sign-extended to 64-bits, extend it. 2743 /// Otherwise just reinterpret it as a 64-bit value. 2744 /// Useful when emitting comparison code for 32-bit values without using 2745 /// the compare instruction (which only considers the lower 32-bits). 2746 SDValue IntegerCompareEliminator::signExtendInputIfNeeded(SDValue Input) { 2747 assert(Input.getValueType() == MVT::i32 && 2748 "Can only sign-extend 32-bit values here."); 2749 unsigned Opc = Input.getOpcode(); 2750 2751 // The value was sign extended and then truncated to 32-bits. No need to 2752 // sign extend it again. 2753 if (Opc == ISD::TRUNCATE && 2754 (Input.getOperand(0).getOpcode() == ISD::AssertSext || 2755 Input.getOperand(0).getOpcode() == ISD::SIGN_EXTEND)) 2756 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); 2757 2758 LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input); 2759 // The input is a sign-extending load. All ppc sign-extending loads 2760 // sign-extend to the full 64-bits. 2761 if (InputLoad && InputLoad->getExtensionType() == ISD::SEXTLOAD) 2762 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); 2763 2764 ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input); 2765 // We don't sign-extend constants. 2766 if (InputConst) 2767 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); 2768 2769 SDLoc dl(Input); 2770 SignExtensionsAdded++; 2771 return SDValue(CurDAG->getMachineNode(PPC::EXTSW_32_64, dl, 2772 MVT::i64, Input), 0); 2773 } 2774 2775 /// If the value isn't guaranteed to be zero-extended to 64-bits, extend it. 2776 /// Otherwise just reinterpret it as a 64-bit value. 2777 /// Useful when emitting comparison code for 32-bit values without using 2778 /// the compare instruction (which only considers the lower 32-bits). 2779 SDValue IntegerCompareEliminator::zeroExtendInputIfNeeded(SDValue Input) { 2780 assert(Input.getValueType() == MVT::i32 && 2781 "Can only zero-extend 32-bit values here."); 2782 unsigned Opc = Input.getOpcode(); 2783 2784 // The only condition under which we can omit the actual extend instruction: 2785 // - The value is a positive constant 2786 // - The value comes from a load that isn't a sign-extending load 2787 // An ISD::TRUNCATE needs to be zero-extended unless it is fed by a zext. 2788 bool IsTruncateOfZExt = Opc == ISD::TRUNCATE && 2789 (Input.getOperand(0).getOpcode() == ISD::AssertZext || 2790 Input.getOperand(0).getOpcode() == ISD::ZERO_EXTEND); 2791 if (IsTruncateOfZExt) 2792 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); 2793 2794 ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input); 2795 if (InputConst && InputConst->getSExtValue() >= 0) 2796 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); 2797 2798 LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input); 2799 // The input is a load that doesn't sign-extend (it will be zero-extended). 2800 if (InputLoad && InputLoad->getExtensionType() != ISD::SEXTLOAD) 2801 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext); 2802 2803 // None of the above, need to zero-extend. 2804 SDLoc dl(Input); 2805 ZeroExtensionsAdded++; 2806 return SDValue(CurDAG->getMachineNode(PPC::RLDICL_32_64, dl, MVT::i64, Input, 2807 S->getI64Imm(0, dl), 2808 S->getI64Imm(32, dl)), 0); 2809 } 2810 2811 // Handle a 32-bit value in a 64-bit register and vice-versa. These are of 2812 // course not actual zero/sign extensions that will generate machine code, 2813 // they're just a way to reinterpret a 32 bit value in a register as a 2814 // 64 bit value and vice-versa. 2815 SDValue IntegerCompareEliminator::addExtOrTrunc(SDValue NatWidthRes, 2816 ExtOrTruncConversion Conv) { 2817 SDLoc dl(NatWidthRes); 2818 2819 // For reinterpreting 32-bit values as 64 bit values, we generate 2820 // INSERT_SUBREG IMPLICIT_DEF:i64, <input>, TargetConstant:i32<1> 2821 if (Conv == ExtOrTruncConversion::Ext) { 2822 SDValue ImDef(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, MVT::i64), 0); 2823 SDValue SubRegIdx = 2824 CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32); 2825 return SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, MVT::i64, 2826 ImDef, NatWidthRes, SubRegIdx), 0); 2827 } 2828 2829 assert(Conv == ExtOrTruncConversion::Trunc && 2830 "Unknown convertion between 32 and 64 bit values."); 2831 // For reinterpreting 64-bit values as 32-bit values, we just need to 2832 // EXTRACT_SUBREG (i.e. extract the low word). 2833 SDValue SubRegIdx = 2834 CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32); 2835 return SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl, MVT::i32, 2836 NatWidthRes, SubRegIdx), 0); 2837 } 2838 2839 // Produce a GPR sequence for compound comparisons (<=, >=) against zero. 2840 // Handle both zero-extensions and sign-extensions. 2841 SDValue 2842 IntegerCompareEliminator::getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl, 2843 ZeroCompare CmpTy) { 2844 EVT InVT = LHS.getValueType(); 2845 bool Is32Bit = InVT == MVT::i32; 2846 SDValue ToExtend; 2847 2848 // Produce the value that needs to be either zero or sign extended. 2849 switch (CmpTy) { 2850 case ZeroCompare::GEZExt: 2851 case ZeroCompare::GESExt: 2852 ToExtend = SDValue(CurDAG->getMachineNode(Is32Bit ? PPC::NOR : PPC::NOR8, 2853 dl, InVT, LHS, LHS), 0); 2854 break; 2855 case ZeroCompare::LEZExt: 2856 case ZeroCompare::LESExt: { 2857 if (Is32Bit) { 2858 // Upper 32 bits cannot be undefined for this sequence. 2859 LHS = signExtendInputIfNeeded(LHS); 2860 SDValue Neg = 2861 SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0); 2862 ToExtend = 2863 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, 2864 Neg, S->getI64Imm(1, dl), 2865 S->getI64Imm(63, dl)), 0); 2866 } else { 2867 SDValue Addi = 2868 SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS, 2869 S->getI64Imm(~0ULL, dl)), 0); 2870 ToExtend = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64, 2871 Addi, LHS), 0); 2872 } 2873 break; 2874 } 2875 } 2876 2877 // For 64-bit sequences, the extensions are the same for the GE/LE cases. 2878 if (!Is32Bit && 2879 (CmpTy == ZeroCompare::GEZExt || CmpTy == ZeroCompare::LEZExt)) 2880 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, 2881 ToExtend, S->getI64Imm(1, dl), 2882 S->getI64Imm(63, dl)), 0); 2883 if (!Is32Bit && 2884 (CmpTy == ZeroCompare::GESExt || CmpTy == ZeroCompare::LESExt)) 2885 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, ToExtend, 2886 S->getI64Imm(63, dl)), 0); 2887 2888 assert(Is32Bit && "Should have handled the 32-bit sequences above."); 2889 // For 32-bit sequences, the extensions differ between GE/LE cases. 2890 switch (CmpTy) { 2891 case ZeroCompare::GEZExt: { 2892 SDValue ShiftOps[] = { ToExtend, S->getI32Imm(1, dl), S->getI32Imm(31, dl), 2893 S->getI32Imm(31, dl) }; 2894 return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, 2895 ShiftOps), 0); 2896 } 2897 case ZeroCompare::GESExt: 2898 return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, ToExtend, 2899 S->getI32Imm(31, dl)), 0); 2900 case ZeroCompare::LEZExt: 2901 return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, ToExtend, 2902 S->getI32Imm(1, dl)), 0); 2903 case ZeroCompare::LESExt: 2904 return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, ToExtend, 2905 S->getI32Imm(-1, dl)), 0); 2906 } 2907 2908 // The above case covers all the enumerators so it can't have a default clause 2909 // to avoid compiler warnings. 2910 llvm_unreachable("Unknown zero-comparison type."); 2911 } 2912 2913 /// Produces a zero-extended result of comparing two 32-bit values according to 2914 /// the passed condition code. 2915 SDValue 2916 IntegerCompareEliminator::get32BitZExtCompare(SDValue LHS, SDValue RHS, 2917 ISD::CondCode CC, 2918 int64_t RHSValue, SDLoc dl) { 2919 if (CmpInGPR == ICGPR_I64 || CmpInGPR == ICGPR_SextI64 || 2920 CmpInGPR == ICGPR_ZextI64 || CmpInGPR == ICGPR_Sext) 2921 return SDValue(); 2922 bool IsRHSZero = RHSValue == 0; 2923 bool IsRHSOne = RHSValue == 1; 2924 bool IsRHSNegOne = RHSValue == -1LL; 2925 switch (CC) { 2926 default: return SDValue(); 2927 case ISD::SETEQ: { 2928 // (zext (setcc %a, %b, seteq)) -> (lshr (cntlzw (xor %a, %b)), 5) 2929 // (zext (setcc %a, 0, seteq)) -> (lshr (cntlzw %a), 5) 2930 SDValue Xor = IsRHSZero ? LHS : 2931 SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0); 2932 SDValue Clz = 2933 SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0); 2934 SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl), 2935 S->getI32Imm(31, dl) }; 2936 return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, 2937 ShiftOps), 0); 2938 } 2939 case ISD::SETNE: { 2940 // (zext (setcc %a, %b, setne)) -> (xor (lshr (cntlzw (xor %a, %b)), 5), 1) 2941 // (zext (setcc %a, 0, setne)) -> (xor (lshr (cntlzw %a), 5), 1) 2942 SDValue Xor = IsRHSZero ? LHS : 2943 SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0); 2944 SDValue Clz = 2945 SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0); 2946 SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl), 2947 S->getI32Imm(31, dl) }; 2948 SDValue Shift = 2949 SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0); 2950 return SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift, 2951 S->getI32Imm(1, dl)), 0); 2952 } 2953 case ISD::SETGE: { 2954 // (zext (setcc %a, %b, setge)) -> (xor (lshr (sub %a, %b), 63), 1) 2955 // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 31) 2956 if(IsRHSZero) 2957 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt); 2958 2959 // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a) 2960 // by swapping inputs and falling through. 2961 std::swap(LHS, RHS); 2962 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); 2963 IsRHSZero = RHSConst && RHSConst->isNullValue(); 2964 LLVM_FALLTHROUGH; 2965 } 2966 case ISD::SETLE: { 2967 if (CmpInGPR == ICGPR_NonExtIn) 2968 return SDValue(); 2969 // (zext (setcc %a, %b, setle)) -> (xor (lshr (sub %b, %a), 63), 1) 2970 // (zext (setcc %a, 0, setle)) -> (xor (lshr (- %a), 63), 1) 2971 if(IsRHSZero) { 2972 if (CmpInGPR == ICGPR_NonExtIn) 2973 return SDValue(); 2974 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt); 2975 } 2976 2977 // The upper 32-bits of the register can't be undefined for this sequence. 2978 LHS = signExtendInputIfNeeded(LHS); 2979 RHS = signExtendInputIfNeeded(RHS); 2980 SDValue Sub = 2981 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0); 2982 SDValue Shift = 2983 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Sub, 2984 S->getI64Imm(1, dl), S->getI64Imm(63, dl)), 2985 0); 2986 return 2987 SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, 2988 MVT::i64, Shift, S->getI32Imm(1, dl)), 0); 2989 } 2990 case ISD::SETGT: { 2991 // (zext (setcc %a, %b, setgt)) -> (lshr (sub %b, %a), 63) 2992 // (zext (setcc %a, -1, setgt)) -> (lshr (~ %a), 31) 2993 // (zext (setcc %a, 0, setgt)) -> (lshr (- %a), 63) 2994 // Handle SETLT -1 (which is equivalent to SETGE 0). 2995 if (IsRHSNegOne) 2996 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt); 2997 2998 if (IsRHSZero) { 2999 if (CmpInGPR == ICGPR_NonExtIn) 3000 return SDValue(); 3001 // The upper 32-bits of the register can't be undefined for this sequence. 3002 LHS = signExtendInputIfNeeded(LHS); 3003 RHS = signExtendInputIfNeeded(RHS); 3004 SDValue Neg = 3005 SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0); 3006 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, 3007 Neg, S->getI32Imm(1, dl), S->getI32Imm(63, dl)), 0); 3008 } 3009 // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as 3010 // (%b < %a) by swapping inputs and falling through. 3011 std::swap(LHS, RHS); 3012 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); 3013 IsRHSZero = RHSConst && RHSConst->isNullValue(); 3014 IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1; 3015 LLVM_FALLTHROUGH; 3016 } 3017 case ISD::SETLT: { 3018 // (zext (setcc %a, %b, setlt)) -> (lshr (sub %a, %b), 63) 3019 // (zext (setcc %a, 1, setlt)) -> (xor (lshr (- %a), 63), 1) 3020 // (zext (setcc %a, 0, setlt)) -> (lshr %a, 31) 3021 // Handle SETLT 1 (which is equivalent to SETLE 0). 3022 if (IsRHSOne) { 3023 if (CmpInGPR == ICGPR_NonExtIn) 3024 return SDValue(); 3025 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt); 3026 } 3027 3028 if (IsRHSZero) { 3029 SDValue ShiftOps[] = { LHS, S->getI32Imm(1, dl), S->getI32Imm(31, dl), 3030 S->getI32Imm(31, dl) }; 3031 return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, 3032 ShiftOps), 0); 3033 } 3034 3035 if (CmpInGPR == ICGPR_NonExtIn) 3036 return SDValue(); 3037 // The upper 32-bits of the register can't be undefined for this sequence. 3038 LHS = signExtendInputIfNeeded(LHS); 3039 RHS = signExtendInputIfNeeded(RHS); 3040 SDValue SUBFNode = 3041 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0); 3042 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, 3043 SUBFNode, S->getI64Imm(1, dl), 3044 S->getI64Imm(63, dl)), 0); 3045 } 3046 case ISD::SETUGE: 3047 // (zext (setcc %a, %b, setuge)) -> (xor (lshr (sub %b, %a), 63), 1) 3048 // (zext (setcc %a, %b, setule)) -> (xor (lshr (sub %a, %b), 63), 1) 3049 std::swap(LHS, RHS); 3050 LLVM_FALLTHROUGH; 3051 case ISD::SETULE: { 3052 if (CmpInGPR == ICGPR_NonExtIn) 3053 return SDValue(); 3054 // The upper 32-bits of the register can't be undefined for this sequence. 3055 LHS = zeroExtendInputIfNeeded(LHS); 3056 RHS = zeroExtendInputIfNeeded(RHS); 3057 SDValue Subtract = 3058 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0); 3059 SDValue SrdiNode = 3060 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, 3061 Subtract, S->getI64Imm(1, dl), 3062 S->getI64Imm(63, dl)), 0); 3063 return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, SrdiNode, 3064 S->getI32Imm(1, dl)), 0); 3065 } 3066 case ISD::SETUGT: 3067 // (zext (setcc %a, %b, setugt)) -> (lshr (sub %b, %a), 63) 3068 // (zext (setcc %a, %b, setult)) -> (lshr (sub %a, %b), 63) 3069 std::swap(LHS, RHS); 3070 LLVM_FALLTHROUGH; 3071 case ISD::SETULT: { 3072 if (CmpInGPR == ICGPR_NonExtIn) 3073 return SDValue(); 3074 // The upper 32-bits of the register can't be undefined for this sequence. 3075 LHS = zeroExtendInputIfNeeded(LHS); 3076 RHS = zeroExtendInputIfNeeded(RHS); 3077 SDValue Subtract = 3078 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0); 3079 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, 3080 Subtract, S->getI64Imm(1, dl), 3081 S->getI64Imm(63, dl)), 0); 3082 } 3083 } 3084 } 3085 3086 /// Produces a sign-extended result of comparing two 32-bit values according to 3087 /// the passed condition code. 3088 SDValue 3089 IntegerCompareEliminator::get32BitSExtCompare(SDValue LHS, SDValue RHS, 3090 ISD::CondCode CC, 3091 int64_t RHSValue, SDLoc dl) { 3092 if (CmpInGPR == ICGPR_I64 || CmpInGPR == ICGPR_SextI64 || 3093 CmpInGPR == ICGPR_ZextI64 || CmpInGPR == ICGPR_Zext) 3094 return SDValue(); 3095 bool IsRHSZero = RHSValue == 0; 3096 bool IsRHSOne = RHSValue == 1; 3097 bool IsRHSNegOne = RHSValue == -1LL; 3098 3099 switch (CC) { 3100 default: return SDValue(); 3101 case ISD::SETEQ: { 3102 // (sext (setcc %a, %b, seteq)) -> 3103 // (ashr (shl (ctlz (xor %a, %b)), 58), 63) 3104 // (sext (setcc %a, 0, seteq)) -> 3105 // (ashr (shl (ctlz %a), 58), 63) 3106 SDValue CountInput = IsRHSZero ? LHS : 3107 SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0); 3108 SDValue Cntlzw = 3109 SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, CountInput), 0); 3110 SDValue SHLOps[] = { Cntlzw, S->getI32Imm(27, dl), 3111 S->getI32Imm(5, dl), S->getI32Imm(31, dl) }; 3112 SDValue Slwi = 3113 SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, SHLOps), 0); 3114 return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Slwi), 0); 3115 } 3116 case ISD::SETNE: { 3117 // Bitwise xor the operands, count leading zeros, shift right by 5 bits and 3118 // flip the bit, finally take 2's complement. 3119 // (sext (setcc %a, %b, setne)) -> 3120 // (neg (xor (lshr (ctlz (xor %a, %b)), 5), 1)) 3121 // Same as above, but the first xor is not needed. 3122 // (sext (setcc %a, 0, setne)) -> 3123 // (neg (xor (lshr (ctlz %a), 5), 1)) 3124 SDValue Xor = IsRHSZero ? LHS : 3125 SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0); 3126 SDValue Clz = 3127 SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0); 3128 SDValue ShiftOps[] = 3129 { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl), S->getI32Imm(31, dl) }; 3130 SDValue Shift = 3131 SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0); 3132 SDValue Xori = 3133 SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift, 3134 S->getI32Imm(1, dl)), 0); 3135 return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Xori), 0); 3136 } 3137 case ISD::SETGE: { 3138 // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %a, %b), 63), -1) 3139 // (sext (setcc %a, 0, setge)) -> (ashr (~ %a), 31) 3140 if (IsRHSZero) 3141 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt); 3142 3143 // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a) 3144 // by swapping inputs and falling through. 3145 std::swap(LHS, RHS); 3146 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); 3147 IsRHSZero = RHSConst && RHSConst->isNullValue(); 3148 LLVM_FALLTHROUGH; 3149 } 3150 case ISD::SETLE: { 3151 if (CmpInGPR == ICGPR_NonExtIn) 3152 return SDValue(); 3153 // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %b, %a), 63), -1) 3154 // (sext (setcc %a, 0, setle)) -> (add (lshr (- %a), 63), -1) 3155 if (IsRHSZero) 3156 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt); 3157 3158 // The upper 32-bits of the register can't be undefined for this sequence. 3159 LHS = signExtendInputIfNeeded(LHS); 3160 RHS = signExtendInputIfNeeded(RHS); 3161 SDValue SUBFNode = 3162 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, MVT::Glue, 3163 LHS, RHS), 0); 3164 SDValue Srdi = 3165 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, 3166 SUBFNode, S->getI64Imm(1, dl), 3167 S->getI64Imm(63, dl)), 0); 3168 return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Srdi, 3169 S->getI32Imm(-1, dl)), 0); 3170 } 3171 case ISD::SETGT: { 3172 // (sext (setcc %a, %b, setgt)) -> (ashr (sub %b, %a), 63) 3173 // (sext (setcc %a, -1, setgt)) -> (ashr (~ %a), 31) 3174 // (sext (setcc %a, 0, setgt)) -> (ashr (- %a), 63) 3175 if (IsRHSNegOne) 3176 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt); 3177 if (IsRHSZero) { 3178 if (CmpInGPR == ICGPR_NonExtIn) 3179 return SDValue(); 3180 // The upper 32-bits of the register can't be undefined for this sequence. 3181 LHS = signExtendInputIfNeeded(LHS); 3182 RHS = signExtendInputIfNeeded(RHS); 3183 SDValue Neg = 3184 SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0); 3185 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Neg, 3186 S->getI64Imm(63, dl)), 0); 3187 } 3188 // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as 3189 // (%b < %a) by swapping inputs and falling through. 3190 std::swap(LHS, RHS); 3191 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); 3192 IsRHSZero = RHSConst && RHSConst->isNullValue(); 3193 IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1; 3194 LLVM_FALLTHROUGH; 3195 } 3196 case ISD::SETLT: { 3197 // (sext (setcc %a, %b, setgt)) -> (ashr (sub %a, %b), 63) 3198 // (sext (setcc %a, 1, setgt)) -> (add (lshr (- %a), 63), -1) 3199 // (sext (setcc %a, 0, setgt)) -> (ashr %a, 31) 3200 if (IsRHSOne) { 3201 if (CmpInGPR == ICGPR_NonExtIn) 3202 return SDValue(); 3203 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt); 3204 } 3205 if (IsRHSZero) 3206 return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, LHS, 3207 S->getI32Imm(31, dl)), 0); 3208 3209 if (CmpInGPR == ICGPR_NonExtIn) 3210 return SDValue(); 3211 // The upper 32-bits of the register can't be undefined for this sequence. 3212 LHS = signExtendInputIfNeeded(LHS); 3213 RHS = signExtendInputIfNeeded(RHS); 3214 SDValue SUBFNode = 3215 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0); 3216 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, 3217 SUBFNode, S->getI64Imm(63, dl)), 0); 3218 } 3219 case ISD::SETUGE: 3220 // (sext (setcc %a, %b, setuge)) -> (add (lshr (sub %a, %b), 63), -1) 3221 // (sext (setcc %a, %b, setule)) -> (add (lshr (sub %b, %a), 63), -1) 3222 std::swap(LHS, RHS); 3223 LLVM_FALLTHROUGH; 3224 case ISD::SETULE: { 3225 if (CmpInGPR == ICGPR_NonExtIn) 3226 return SDValue(); 3227 // The upper 32-bits of the register can't be undefined for this sequence. 3228 LHS = zeroExtendInputIfNeeded(LHS); 3229 RHS = zeroExtendInputIfNeeded(RHS); 3230 SDValue Subtract = 3231 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0); 3232 SDValue Shift = 3233 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Subtract, 3234 S->getI32Imm(1, dl), S->getI32Imm(63,dl)), 3235 0); 3236 return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Shift, 3237 S->getI32Imm(-1, dl)), 0); 3238 } 3239 case ISD::SETUGT: 3240 // (sext (setcc %a, %b, setugt)) -> (ashr (sub %b, %a), 63) 3241 // (sext (setcc %a, %b, setugt)) -> (ashr (sub %a, %b), 63) 3242 std::swap(LHS, RHS); 3243 LLVM_FALLTHROUGH; 3244 case ISD::SETULT: { 3245 if (CmpInGPR == ICGPR_NonExtIn) 3246 return SDValue(); 3247 // The upper 32-bits of the register can't be undefined for this sequence. 3248 LHS = zeroExtendInputIfNeeded(LHS); 3249 RHS = zeroExtendInputIfNeeded(RHS); 3250 SDValue Subtract = 3251 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0); 3252 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, 3253 Subtract, S->getI64Imm(63, dl)), 0); 3254 } 3255 } 3256 } 3257 3258 /// Produces a zero-extended result of comparing two 64-bit values according to 3259 /// the passed condition code. 3260 SDValue 3261 IntegerCompareEliminator::get64BitZExtCompare(SDValue LHS, SDValue RHS, 3262 ISD::CondCode CC, 3263 int64_t RHSValue, SDLoc dl) { 3264 if (CmpInGPR == ICGPR_I32 || CmpInGPR == ICGPR_SextI32 || 3265 CmpInGPR == ICGPR_ZextI32 || CmpInGPR == ICGPR_Sext) 3266 return SDValue(); 3267 bool IsRHSZero = RHSValue == 0; 3268 bool IsRHSOne = RHSValue == 1; 3269 bool IsRHSNegOne = RHSValue == -1LL; 3270 switch (CC) { 3271 default: return SDValue(); 3272 case ISD::SETEQ: { 3273 // (zext (setcc %a, %b, seteq)) -> (lshr (ctlz (xor %a, %b)), 6) 3274 // (zext (setcc %a, 0, seteq)) -> (lshr (ctlz %a), 6) 3275 SDValue Xor = IsRHSZero ? LHS : 3276 SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0); 3277 SDValue Clz = 3278 SDValue(CurDAG->getMachineNode(PPC::CNTLZD, dl, MVT::i64, Xor), 0); 3279 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Clz, 3280 S->getI64Imm(58, dl), 3281 S->getI64Imm(63, dl)), 0); 3282 } 3283 case ISD::SETNE: { 3284 // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1) 3285 // (zext (setcc %a, %b, setne)) -> (sube addc.reg, addc.reg, addc.CA) 3286 // {addcz.reg, addcz.CA} = (addcarry %a, -1) 3287 // (zext (setcc %a, 0, setne)) -> (sube addcz.reg, addcz.reg, addcz.CA) 3288 SDValue Xor = IsRHSZero ? LHS : 3289 SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0); 3290 SDValue AC = 3291 SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue, 3292 Xor, S->getI32Imm(~0U, dl)), 0); 3293 return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, AC, 3294 Xor, AC.getValue(1)), 0); 3295 } 3296 case ISD::SETGE: { 3297 // {subc.reg, subc.CA} = (subcarry %a, %b) 3298 // (zext (setcc %a, %b, setge)) -> 3299 // (adde (lshr %b, 63), (ashr %a, 63), subc.CA) 3300 // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 63) 3301 if (IsRHSZero) 3302 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt); 3303 std::swap(LHS, RHS); 3304 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); 3305 IsRHSZero = RHSConst && RHSConst->isNullValue(); 3306 LLVM_FALLTHROUGH; 3307 } 3308 case ISD::SETLE: { 3309 // {subc.reg, subc.CA} = (subcarry %b, %a) 3310 // (zext (setcc %a, %b, setge)) -> 3311 // (adde (lshr %a, 63), (ashr %b, 63), subc.CA) 3312 // (zext (setcc %a, 0, setge)) -> (lshr (or %a, (add %a, -1)), 63) 3313 if (IsRHSZero) 3314 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt); 3315 SDValue ShiftL = 3316 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS, 3317 S->getI64Imm(1, dl), 3318 S->getI64Imm(63, dl)), 0); 3319 SDValue ShiftR = 3320 SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS, 3321 S->getI64Imm(63, dl)), 0); 3322 SDValue SubtractCarry = 3323 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, 3324 LHS, RHS), 1); 3325 return SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue, 3326 ShiftR, ShiftL, SubtractCarry), 0); 3327 } 3328 case ISD::SETGT: { 3329 // {subc.reg, subc.CA} = (subcarry %b, %a) 3330 // (zext (setcc %a, %b, setgt)) -> 3331 // (xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1) 3332 // (zext (setcc %a, 0, setgt)) -> (lshr (nor (add %a, -1), %a), 63) 3333 if (IsRHSNegOne) 3334 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt); 3335 if (IsRHSZero) { 3336 SDValue Addi = 3337 SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS, 3338 S->getI64Imm(~0ULL, dl)), 0); 3339 SDValue Nor = 3340 SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Addi, LHS), 0); 3341 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Nor, 3342 S->getI64Imm(1, dl), 3343 S->getI64Imm(63, dl)), 0); 3344 } 3345 std::swap(LHS, RHS); 3346 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); 3347 IsRHSZero = RHSConst && RHSConst->isNullValue(); 3348 IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1; 3349 LLVM_FALLTHROUGH; 3350 } 3351 case ISD::SETLT: { 3352 // {subc.reg, subc.CA} = (subcarry %a, %b) 3353 // (zext (setcc %a, %b, setlt)) -> 3354 // (xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1) 3355 // (zext (setcc %a, 0, setlt)) -> (lshr %a, 63) 3356 if (IsRHSOne) 3357 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt); 3358 if (IsRHSZero) 3359 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS, 3360 S->getI64Imm(1, dl), 3361 S->getI64Imm(63, dl)), 0); 3362 SDValue SRADINode = 3363 SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, 3364 LHS, S->getI64Imm(63, dl)), 0); 3365 SDValue SRDINode = 3366 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, 3367 RHS, S->getI64Imm(1, dl), 3368 S->getI64Imm(63, dl)), 0); 3369 SDValue SUBFC8Carry = 3370 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, 3371 RHS, LHS), 1); 3372 SDValue ADDE8Node = 3373 SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue, 3374 SRDINode, SRADINode, SUBFC8Carry), 0); 3375 return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, 3376 ADDE8Node, S->getI64Imm(1, dl)), 0); 3377 } 3378 case ISD::SETUGE: 3379 // {subc.reg, subc.CA} = (subcarry %a, %b) 3380 // (zext (setcc %a, %b, setuge)) -> (add (sube %b, %b, subc.CA), 1) 3381 std::swap(LHS, RHS); 3382 LLVM_FALLTHROUGH; 3383 case ISD::SETULE: { 3384 // {subc.reg, subc.CA} = (subcarry %b, %a) 3385 // (zext (setcc %a, %b, setule)) -> (add (sube %a, %a, subc.CA), 1) 3386 SDValue SUBFC8Carry = 3387 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, 3388 LHS, RHS), 1); 3389 SDValue SUBFE8Node = 3390 SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue, 3391 LHS, LHS, SUBFC8Carry), 0); 3392 return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, 3393 SUBFE8Node, S->getI64Imm(1, dl)), 0); 3394 } 3395 case ISD::SETUGT: 3396 // {subc.reg, subc.CA} = (subcarry %b, %a) 3397 // (zext (setcc %a, %b, setugt)) -> -(sube %b, %b, subc.CA) 3398 std::swap(LHS, RHS); 3399 LLVM_FALLTHROUGH; 3400 case ISD::SETULT: { 3401 // {subc.reg, subc.CA} = (subcarry %a, %b) 3402 // (zext (setcc %a, %b, setult)) -> -(sube %a, %a, subc.CA) 3403 SDValue SubtractCarry = 3404 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, 3405 RHS, LHS), 1); 3406 SDValue ExtSub = 3407 SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, 3408 LHS, LHS, SubtractCarry), 0); 3409 return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, 3410 ExtSub), 0); 3411 } 3412 } 3413 } 3414 3415 /// Produces a sign-extended result of comparing two 64-bit values according to 3416 /// the passed condition code. 3417 SDValue 3418 IntegerCompareEliminator::get64BitSExtCompare(SDValue LHS, SDValue RHS, 3419 ISD::CondCode CC, 3420 int64_t RHSValue, SDLoc dl) { 3421 if (CmpInGPR == ICGPR_I32 || CmpInGPR == ICGPR_SextI32 || 3422 CmpInGPR == ICGPR_ZextI32 || CmpInGPR == ICGPR_Zext) 3423 return SDValue(); 3424 bool IsRHSZero = RHSValue == 0; 3425 bool IsRHSOne = RHSValue == 1; 3426 bool IsRHSNegOne = RHSValue == -1LL; 3427 switch (CC) { 3428 default: return SDValue(); 3429 case ISD::SETEQ: { 3430 // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1) 3431 // (sext (setcc %a, %b, seteq)) -> (sube addc.reg, addc.reg, addc.CA) 3432 // {addcz.reg, addcz.CA} = (addcarry %a, -1) 3433 // (sext (setcc %a, 0, seteq)) -> (sube addcz.reg, addcz.reg, addcz.CA) 3434 SDValue AddInput = IsRHSZero ? LHS : 3435 SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0); 3436 SDValue Addic = 3437 SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue, 3438 AddInput, S->getI32Imm(~0U, dl)), 0); 3439 return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, Addic, 3440 Addic, Addic.getValue(1)), 0); 3441 } 3442 case ISD::SETNE: { 3443 // {subfc.reg, subfc.CA} = (subcarry 0, (xor %a, %b)) 3444 // (sext (setcc %a, %b, setne)) -> (sube subfc.reg, subfc.reg, subfc.CA) 3445 // {subfcz.reg, subfcz.CA} = (subcarry 0, %a) 3446 // (sext (setcc %a, 0, setne)) -> (sube subfcz.reg, subfcz.reg, subfcz.CA) 3447 SDValue Xor = IsRHSZero ? LHS : 3448 SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0); 3449 SDValue SC = 3450 SDValue(CurDAG->getMachineNode(PPC::SUBFIC8, dl, MVT::i64, MVT::Glue, 3451 Xor, S->getI32Imm(0, dl)), 0); 3452 return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, SC, 3453 SC, SC.getValue(1)), 0); 3454 } 3455 case ISD::SETGE: { 3456 // {subc.reg, subc.CA} = (subcarry %a, %b) 3457 // (zext (setcc %a, %b, setge)) -> 3458 // (- (adde (lshr %b, 63), (ashr %a, 63), subc.CA)) 3459 // (zext (setcc %a, 0, setge)) -> (~ (ashr %a, 63)) 3460 if (IsRHSZero) 3461 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt); 3462 std::swap(LHS, RHS); 3463 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); 3464 IsRHSZero = RHSConst && RHSConst->isNullValue(); 3465 LLVM_FALLTHROUGH; 3466 } 3467 case ISD::SETLE: { 3468 // {subc.reg, subc.CA} = (subcarry %b, %a) 3469 // (zext (setcc %a, %b, setge)) -> 3470 // (- (adde (lshr %a, 63), (ashr %b, 63), subc.CA)) 3471 // (zext (setcc %a, 0, setge)) -> (ashr (or %a, (add %a, -1)), 63) 3472 if (IsRHSZero) 3473 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt); 3474 SDValue ShiftR = 3475 SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS, 3476 S->getI64Imm(63, dl)), 0); 3477 SDValue ShiftL = 3478 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS, 3479 S->getI64Imm(1, dl), 3480 S->getI64Imm(63, dl)), 0); 3481 SDValue SubtractCarry = 3482 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, 3483 LHS, RHS), 1); 3484 SDValue Adde = 3485 SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue, 3486 ShiftR, ShiftL, SubtractCarry), 0); 3487 return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, Adde), 0); 3488 } 3489 case ISD::SETGT: { 3490 // {subc.reg, subc.CA} = (subcarry %b, %a) 3491 // (zext (setcc %a, %b, setgt)) -> 3492 // -(xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1) 3493 // (zext (setcc %a, 0, setgt)) -> (ashr (nor (add %a, -1), %a), 63) 3494 if (IsRHSNegOne) 3495 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt); 3496 if (IsRHSZero) { 3497 SDValue Add = 3498 SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS, 3499 S->getI64Imm(-1, dl)), 0); 3500 SDValue Nor = 3501 SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Add, LHS), 0); 3502 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Nor, 3503 S->getI64Imm(63, dl)), 0); 3504 } 3505 std::swap(LHS, RHS); 3506 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); 3507 IsRHSZero = RHSConst && RHSConst->isNullValue(); 3508 IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1; 3509 LLVM_FALLTHROUGH; 3510 } 3511 case ISD::SETLT: { 3512 // {subc.reg, subc.CA} = (subcarry %a, %b) 3513 // (zext (setcc %a, %b, setlt)) -> 3514 // -(xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1) 3515 // (zext (setcc %a, 0, setlt)) -> (ashr %a, 63) 3516 if (IsRHSOne) 3517 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt); 3518 if (IsRHSZero) { 3519 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, LHS, 3520 S->getI64Imm(63, dl)), 0); 3521 } 3522 SDValue SRADINode = 3523 SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, 3524 LHS, S->getI64Imm(63, dl)), 0); 3525 SDValue SRDINode = 3526 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, 3527 RHS, S->getI64Imm(1, dl), 3528 S->getI64Imm(63, dl)), 0); 3529 SDValue SUBFC8Carry = 3530 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, 3531 RHS, LHS), 1); 3532 SDValue ADDE8Node = 3533 SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, 3534 SRDINode, SRADINode, SUBFC8Carry), 0); 3535 SDValue XORI8Node = 3536 SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, 3537 ADDE8Node, S->getI64Imm(1, dl)), 0); 3538 return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, 3539 XORI8Node), 0); 3540 } 3541 case ISD::SETUGE: 3542 // {subc.reg, subc.CA} = (subcarry %a, %b) 3543 // (sext (setcc %a, %b, setuge)) -> ~(sube %b, %b, subc.CA) 3544 std::swap(LHS, RHS); 3545 LLVM_FALLTHROUGH; 3546 case ISD::SETULE: { 3547 // {subc.reg, subc.CA} = (subcarry %b, %a) 3548 // (sext (setcc %a, %b, setule)) -> ~(sube %a, %a, subc.CA) 3549 SDValue SubtractCarry = 3550 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, 3551 LHS, RHS), 1); 3552 SDValue ExtSub = 3553 SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue, LHS, 3554 LHS, SubtractCarry), 0); 3555 return SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, 3556 ExtSub, ExtSub), 0); 3557 } 3558 case ISD::SETUGT: 3559 // {subc.reg, subc.CA} = (subcarry %b, %a) 3560 // (sext (setcc %a, %b, setugt)) -> (sube %b, %b, subc.CA) 3561 std::swap(LHS, RHS); 3562 LLVM_FALLTHROUGH; 3563 case ISD::SETULT: { 3564 // {subc.reg, subc.CA} = (subcarry %a, %b) 3565 // (sext (setcc %a, %b, setult)) -> (sube %a, %a, subc.CA) 3566 SDValue SubCarry = 3567 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue, 3568 RHS, LHS), 1); 3569 return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, 3570 LHS, LHS, SubCarry), 0); 3571 } 3572 } 3573 } 3574 3575 /// Do all uses of this SDValue need the result in a GPR? 3576 /// This is meant to be used on values that have type i1 since 3577 /// it is somewhat meaningless to ask if values of other types 3578 /// should be kept in GPR's. 3579 static bool allUsesExtend(SDValue Compare, SelectionDAG *CurDAG) { 3580 assert(Compare.getOpcode() == ISD::SETCC && 3581 "An ISD::SETCC node required here."); 3582 3583 // For values that have a single use, the caller should obviously already have 3584 // checked if that use is an extending use. We check the other uses here. 3585 if (Compare.hasOneUse()) 3586 return true; 3587 // We want the value in a GPR if it is being extended, used for a select, or 3588 // used in logical operations. 3589 for (auto CompareUse : Compare.getNode()->uses()) 3590 if (CompareUse->getOpcode() != ISD::SIGN_EXTEND && 3591 CompareUse->getOpcode() != ISD::ZERO_EXTEND && 3592 CompareUse->getOpcode() != ISD::SELECT && 3593 !isLogicOp(CompareUse->getOpcode())) { 3594 OmittedForNonExtendUses++; 3595 return false; 3596 } 3597 return true; 3598 } 3599 3600 /// Returns an equivalent of a SETCC node but with the result the same width as 3601 /// the inputs. This can also be used for SELECT_CC if either the true or false 3602 /// values is a power of two while the other is zero. 3603 SDValue IntegerCompareEliminator::getSETCCInGPR(SDValue Compare, 3604 SetccInGPROpts ConvOpts) { 3605 assert((Compare.getOpcode() == ISD::SETCC || 3606 Compare.getOpcode() == ISD::SELECT_CC) && 3607 "An ISD::SETCC node required here."); 3608 3609 // Don't convert this comparison to a GPR sequence because there are uses 3610 // of the i1 result (i.e. uses that require the result in the CR). 3611 if ((Compare.getOpcode() == ISD::SETCC) && !allUsesExtend(Compare, CurDAG)) 3612 return SDValue(); 3613 3614 SDValue LHS = Compare.getOperand(0); 3615 SDValue RHS = Compare.getOperand(1); 3616 3617 // The condition code is operand 2 for SETCC and operand 4 for SELECT_CC. 3618 int CCOpNum = Compare.getOpcode() == ISD::SELECT_CC ? 4 : 2; 3619 ISD::CondCode CC = 3620 cast<CondCodeSDNode>(Compare.getOperand(CCOpNum))->get(); 3621 EVT InputVT = LHS.getValueType(); 3622 if (InputVT != MVT::i32 && InputVT != MVT::i64) 3623 return SDValue(); 3624 3625 if (ConvOpts == SetccInGPROpts::ZExtInvert || 3626 ConvOpts == SetccInGPROpts::SExtInvert) 3627 CC = ISD::getSetCCInverse(CC, InputVT); 3628 3629 bool Inputs32Bit = InputVT == MVT::i32; 3630 3631 SDLoc dl(Compare); 3632 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS); 3633 int64_t RHSValue = RHSConst ? RHSConst->getSExtValue() : INT64_MAX; 3634 bool IsSext = ConvOpts == SetccInGPROpts::SExtOrig || 3635 ConvOpts == SetccInGPROpts::SExtInvert; 3636 3637 if (IsSext && Inputs32Bit) 3638 return get32BitSExtCompare(LHS, RHS, CC, RHSValue, dl); 3639 else if (Inputs32Bit) 3640 return get32BitZExtCompare(LHS, RHS, CC, RHSValue, dl); 3641 else if (IsSext) 3642 return get64BitSExtCompare(LHS, RHS, CC, RHSValue, dl); 3643 return get64BitZExtCompare(LHS, RHS, CC, RHSValue, dl); 3644 } 3645 3646 } // end anonymous namespace 3647 3648 bool PPCDAGToDAGISel::tryIntCompareInGPR(SDNode *N) { 3649 if (N->getValueType(0) != MVT::i32 && 3650 N->getValueType(0) != MVT::i64) 3651 return false; 3652 3653 // This optimization will emit code that assumes 64-bit registers 3654 // so we don't want to run it in 32-bit mode. Also don't run it 3655 // on functions that are not to be optimized. 3656 if (TM.getOptLevel() == CodeGenOpt::None || !TM.isPPC64()) 3657 return false; 3658 3659 switch (N->getOpcode()) { 3660 default: break; 3661 case ISD::ZERO_EXTEND: 3662 case ISD::SIGN_EXTEND: 3663 case ISD::AND: 3664 case ISD::OR: 3665 case ISD::XOR: { 3666 IntegerCompareEliminator ICmpElim(CurDAG, this); 3667 if (SDNode *New = ICmpElim.Select(N)) { 3668 ReplaceNode(N, New); 3669 return true; 3670 } 3671 } 3672 } 3673 return false; 3674 } 3675 3676 bool PPCDAGToDAGISel::tryBitPermutation(SDNode *N) { 3677 if (N->getValueType(0) != MVT::i32 && 3678 N->getValueType(0) != MVT::i64) 3679 return false; 3680 3681 if (!UseBitPermRewriter) 3682 return false; 3683 3684 switch (N->getOpcode()) { 3685 default: break; 3686 case ISD::ROTL: 3687 case ISD::SHL: 3688 case ISD::SRL: 3689 case ISD::AND: 3690 case ISD::OR: { 3691 BitPermutationSelector BPS(CurDAG); 3692 if (SDNode *New = BPS.Select(N)) { 3693 ReplaceNode(N, New); 3694 return true; 3695 } 3696 return false; 3697 } 3698 } 3699 3700 return false; 3701 } 3702 3703 /// SelectCC - Select a comparison of the specified values with the specified 3704 /// condition code, returning the CR# of the expression. 3705 SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, 3706 const SDLoc &dl) { 3707 // Always select the LHS. 3708 unsigned Opc; 3709 3710 if (LHS.getValueType() == MVT::i32) { 3711 unsigned Imm; 3712 if (CC == ISD::SETEQ || CC == ISD::SETNE) { 3713 if (isInt32Immediate(RHS, Imm)) { 3714 // SETEQ/SETNE comparison with 16-bit immediate, fold it. 3715 if (isUInt<16>(Imm)) 3716 return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS, 3717 getI32Imm(Imm & 0xFFFF, dl)), 3718 0); 3719 // If this is a 16-bit signed immediate, fold it. 3720 if (isInt<16>((int)Imm)) 3721 return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS, 3722 getI32Imm(Imm & 0xFFFF, dl)), 3723 0); 3724 3725 // For non-equality comparisons, the default code would materialize the 3726 // constant, then compare against it, like this: 3727 // lis r2, 4660 3728 // ori r2, r2, 22136 3729 // cmpw cr0, r3, r2 3730 // Since we are just comparing for equality, we can emit this instead: 3731 // xoris r0,r3,0x1234 3732 // cmplwi cr0,r0,0x5678 3733 // beq cr0,L6 3734 SDValue Xor(CurDAG->getMachineNode(PPC::XORIS, dl, MVT::i32, LHS, 3735 getI32Imm(Imm >> 16, dl)), 0); 3736 return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, Xor, 3737 getI32Imm(Imm & 0xFFFF, dl)), 0); 3738 } 3739 Opc = PPC::CMPLW; 3740 } else if (ISD::isUnsignedIntSetCC(CC)) { 3741 if (isInt32Immediate(RHS, Imm) && isUInt<16>(Imm)) 3742 return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS, 3743 getI32Imm(Imm & 0xFFFF, dl)), 0); 3744 Opc = PPC::CMPLW; 3745 } else { 3746 int16_t SImm; 3747 if (isIntS16Immediate(RHS, SImm)) 3748 return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS, 3749 getI32Imm((int)SImm & 0xFFFF, 3750 dl)), 3751 0); 3752 Opc = PPC::CMPW; 3753 } 3754 } else if (LHS.getValueType() == MVT::i64) { 3755 uint64_t Imm; 3756 if (CC == ISD::SETEQ || CC == ISD::SETNE) { 3757 if (isInt64Immediate(RHS.getNode(), Imm)) { 3758 // SETEQ/SETNE comparison with 16-bit immediate, fold it. 3759 if (isUInt<16>(Imm)) 3760 return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS, 3761 getI32Imm(Imm & 0xFFFF, dl)), 3762 0); 3763 // If this is a 16-bit signed immediate, fold it. 3764 if (isInt<16>(Imm)) 3765 return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS, 3766 getI32Imm(Imm & 0xFFFF, dl)), 3767 0); 3768 3769 // For non-equality comparisons, the default code would materialize the 3770 // constant, then compare against it, like this: 3771 // lis r2, 4660 3772 // ori r2, r2, 22136 3773 // cmpd cr0, r3, r2 3774 // Since we are just comparing for equality, we can emit this instead: 3775 // xoris r0,r3,0x1234 3776 // cmpldi cr0,r0,0x5678 3777 // beq cr0,L6 3778 if (isUInt<32>(Imm)) { 3779 SDValue Xor(CurDAG->getMachineNode(PPC::XORIS8, dl, MVT::i64, LHS, 3780 getI64Imm(Imm >> 16, dl)), 0); 3781 return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, Xor, 3782 getI64Imm(Imm & 0xFFFF, dl)), 3783 0); 3784 } 3785 } 3786 Opc = PPC::CMPLD; 3787 } else if (ISD::isUnsignedIntSetCC(CC)) { 3788 if (isInt64Immediate(RHS.getNode(), Imm) && isUInt<16>(Imm)) 3789 return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS, 3790 getI64Imm(Imm & 0xFFFF, dl)), 0); 3791 Opc = PPC::CMPLD; 3792 } else { 3793 int16_t SImm; 3794 if (isIntS16Immediate(RHS, SImm)) 3795 return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS, 3796 getI64Imm(SImm & 0xFFFF, dl)), 3797 0); 3798 Opc = PPC::CMPD; 3799 } 3800 } else if (LHS.getValueType() == MVT::f32) { 3801 if (PPCSubTarget->hasSPE()) { 3802 switch (CC) { 3803 default: 3804 case ISD::SETEQ: 3805 case ISD::SETNE: 3806 Opc = PPC::EFSCMPEQ; 3807 break; 3808 case ISD::SETLT: 3809 case ISD::SETGE: 3810 case ISD::SETOLT: 3811 case ISD::SETOGE: 3812 case ISD::SETULT: 3813 case ISD::SETUGE: 3814 Opc = PPC::EFSCMPLT; 3815 break; 3816 case ISD::SETGT: 3817 case ISD::SETLE: 3818 case ISD::SETOGT: 3819 case ISD::SETOLE: 3820 case ISD::SETUGT: 3821 case ISD::SETULE: 3822 Opc = PPC::EFSCMPGT; 3823 break; 3824 } 3825 } else 3826 Opc = PPC::FCMPUS; 3827 } else if (LHS.getValueType() == MVT::f64) { 3828 if (PPCSubTarget->hasSPE()) { 3829 switch (CC) { 3830 default: 3831 case ISD::SETEQ: 3832 case ISD::SETNE: 3833 Opc = PPC::EFDCMPEQ; 3834 break; 3835 case ISD::SETLT: 3836 case ISD::SETGE: 3837 case ISD::SETOLT: 3838 case ISD::SETOGE: 3839 case ISD::SETULT: 3840 case ISD::SETUGE: 3841 Opc = PPC::EFDCMPLT; 3842 break; 3843 case ISD::SETGT: 3844 case ISD::SETLE: 3845 case ISD::SETOGT: 3846 case ISD::SETOLE: 3847 case ISD::SETUGT: 3848 case ISD::SETULE: 3849 Opc = PPC::EFDCMPGT; 3850 break; 3851 } 3852 } else 3853 Opc = PPCSubTarget->hasVSX() ? PPC::XSCMPUDP : PPC::FCMPUD; 3854 } else { 3855 assert(LHS.getValueType() == MVT::f128 && "Unknown vt!"); 3856 assert(PPCSubTarget->hasVSX() && "__float128 requires VSX"); 3857 Opc = PPC::XSCMPUQP; 3858 } 3859 return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0); 3860 } 3861 3862 static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC, const EVT &VT, 3863 const PPCSubtarget *Subtarget) { 3864 // For SPE instructions, the result is in GT bit of the CR 3865 bool UseSPE = Subtarget->hasSPE() && VT.isFloatingPoint(); 3866 3867 switch (CC) { 3868 case ISD::SETUEQ: 3869 case ISD::SETONE: 3870 case ISD::SETOLE: 3871 case ISD::SETOGE: 3872 llvm_unreachable("Should be lowered by legalize!"); 3873 default: llvm_unreachable("Unknown condition!"); 3874 case ISD::SETOEQ: 3875 case ISD::SETEQ: 3876 return UseSPE ? PPC::PRED_GT : PPC::PRED_EQ; 3877 case ISD::SETUNE: 3878 case ISD::SETNE: 3879 return UseSPE ? PPC::PRED_LE : PPC::PRED_NE; 3880 case ISD::SETOLT: 3881 case ISD::SETLT: 3882 return UseSPE ? PPC::PRED_GT : PPC::PRED_LT; 3883 case ISD::SETULE: 3884 case ISD::SETLE: 3885 return PPC::PRED_LE; 3886 case ISD::SETOGT: 3887 case ISD::SETGT: 3888 return PPC::PRED_GT; 3889 case ISD::SETUGE: 3890 case ISD::SETGE: 3891 return UseSPE ? PPC::PRED_LE : PPC::PRED_GE; 3892 case ISD::SETO: return PPC::PRED_NU; 3893 case ISD::SETUO: return PPC::PRED_UN; 3894 // These two are invalid for floating point. Assume we have int. 3895 case ISD::SETULT: return PPC::PRED_LT; 3896 case ISD::SETUGT: return PPC::PRED_GT; 3897 } 3898 } 3899 3900 /// getCRIdxForSetCC - Return the index of the condition register field 3901 /// associated with the SetCC condition, and whether or not the field is 3902 /// treated as inverted. That is, lt = 0; ge = 0 inverted. 3903 static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) { 3904 Invert = false; 3905 switch (CC) { 3906 default: llvm_unreachable("Unknown condition!"); 3907 case ISD::SETOLT: 3908 case ISD::SETLT: return 0; // Bit #0 = SETOLT 3909 case ISD::SETOGT: 3910 case ISD::SETGT: return 1; // Bit #1 = SETOGT 3911 case ISD::SETOEQ: 3912 case ISD::SETEQ: return 2; // Bit #2 = SETOEQ 3913 case ISD::SETUO: return 3; // Bit #3 = SETUO 3914 case ISD::SETUGE: 3915 case ISD::SETGE: Invert = true; return 0; // !Bit #0 = SETUGE 3916 case ISD::SETULE: 3917 case ISD::SETLE: Invert = true; return 1; // !Bit #1 = SETULE 3918 case ISD::SETUNE: 3919 case ISD::SETNE: Invert = true; return 2; // !Bit #2 = SETUNE 3920 case ISD::SETO: Invert = true; return 3; // !Bit #3 = SETO 3921 case ISD::SETUEQ: 3922 case ISD::SETOGE: 3923 case ISD::SETOLE: 3924 case ISD::SETONE: 3925 llvm_unreachable("Invalid branch code: should be expanded by legalize"); 3926 // These are invalid for floating point. Assume integer. 3927 case ISD::SETULT: return 0; 3928 case ISD::SETUGT: return 1; 3929 } 3930 } 3931 3932 // getVCmpInst: return the vector compare instruction for the specified 3933 // vector type and condition code. Since this is for altivec specific code, 3934 // only support the altivec types (v16i8, v8i16, v4i32, v2i64, and v4f32). 3935 static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC, 3936 bool HasVSX, bool &Swap, bool &Negate) { 3937 Swap = false; 3938 Negate = false; 3939 3940 if (VecVT.isFloatingPoint()) { 3941 /* Handle some cases by swapping input operands. */ 3942 switch (CC) { 3943 case ISD::SETLE: CC = ISD::SETGE; Swap = true; break; 3944 case ISD::SETLT: CC = ISD::SETGT; Swap = true; break; 3945 case ISD::SETOLE: CC = ISD::SETOGE; Swap = true; break; 3946 case ISD::SETOLT: CC = ISD::SETOGT; Swap = true; break; 3947 case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break; 3948 case ISD::SETUGT: CC = ISD::SETULT; Swap = true; break; 3949 default: break; 3950 } 3951 /* Handle some cases by negating the result. */ 3952 switch (CC) { 3953 case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break; 3954 case ISD::SETUNE: CC = ISD::SETOEQ; Negate = true; break; 3955 case ISD::SETULE: CC = ISD::SETOGT; Negate = true; break; 3956 case ISD::SETULT: CC = ISD::SETOGE; Negate = true; break; 3957 default: break; 3958 } 3959 /* We have instructions implementing the remaining cases. */ 3960 switch (CC) { 3961 case ISD::SETEQ: 3962 case ISD::SETOEQ: 3963 if (VecVT == MVT::v4f32) 3964 return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP; 3965 else if (VecVT == MVT::v2f64) 3966 return PPC::XVCMPEQDP; 3967 break; 3968 case ISD::SETGT: 3969 case ISD::SETOGT: 3970 if (VecVT == MVT::v4f32) 3971 return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP; 3972 else if (VecVT == MVT::v2f64) 3973 return PPC::XVCMPGTDP; 3974 break; 3975 case ISD::SETGE: 3976 case ISD::SETOGE: 3977 if (VecVT == MVT::v4f32) 3978 return HasVSX ? PPC::XVCMPGESP : PPC::VCMPGEFP; 3979 else if (VecVT == MVT::v2f64) 3980 return PPC::XVCMPGEDP; 3981 break; 3982 default: 3983 break; 3984 } 3985 llvm_unreachable("Invalid floating-point vector compare condition"); 3986 } else { 3987 /* Handle some cases by swapping input operands. */ 3988 switch (CC) { 3989 case ISD::SETGE: CC = ISD::SETLE; Swap = true; break; 3990 case ISD::SETLT: CC = ISD::SETGT; Swap = true; break; 3991 case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break; 3992 case ISD::SETULT: CC = ISD::SETUGT; Swap = true; break; 3993 default: break; 3994 } 3995 /* Handle some cases by negating the result. */ 3996 switch (CC) { 3997 case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break; 3998 case ISD::SETUNE: CC = ISD::SETUEQ; Negate = true; break; 3999 case ISD::SETLE: CC = ISD::SETGT; Negate = true; break; 4000 case ISD::SETULE: CC = ISD::SETUGT; Negate = true; break; 4001 default: break; 4002 } 4003 /* We have instructions implementing the remaining cases. */ 4004 switch (CC) { 4005 case ISD::SETEQ: 4006 case ISD::SETUEQ: 4007 if (VecVT == MVT::v16i8) 4008 return PPC::VCMPEQUB; 4009 else if (VecVT == MVT::v8i16) 4010 return PPC::VCMPEQUH; 4011 else if (VecVT == MVT::v4i32) 4012 return PPC::VCMPEQUW; 4013 else if (VecVT == MVT::v2i64) 4014 return PPC::VCMPEQUD; 4015 break; 4016 case ISD::SETGT: 4017 if (VecVT == MVT::v16i8) 4018 return PPC::VCMPGTSB; 4019 else if (VecVT == MVT::v8i16) 4020 return PPC::VCMPGTSH; 4021 else if (VecVT == MVT::v4i32) 4022 return PPC::VCMPGTSW; 4023 else if (VecVT == MVT::v2i64) 4024 return PPC::VCMPGTSD; 4025 break; 4026 case ISD::SETUGT: 4027 if (VecVT == MVT::v16i8) 4028 return PPC::VCMPGTUB; 4029 else if (VecVT == MVT::v8i16) 4030 return PPC::VCMPGTUH; 4031 else if (VecVT == MVT::v4i32) 4032 return PPC::VCMPGTUW; 4033 else if (VecVT == MVT::v2i64) 4034 return PPC::VCMPGTUD; 4035 break; 4036 default: 4037 break; 4038 } 4039 llvm_unreachable("Invalid integer vector compare condition"); 4040 } 4041 } 4042 4043 bool PPCDAGToDAGISel::trySETCC(SDNode *N) { 4044 SDLoc dl(N); 4045 unsigned Imm; 4046 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); 4047 EVT PtrVT = 4048 CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout()); 4049 bool isPPC64 = (PtrVT == MVT::i64); 4050 4051 if (!PPCSubTarget->useCRBits() && 4052 isInt32Immediate(N->getOperand(1), Imm)) { 4053 // We can codegen setcc op, imm very efficiently compared to a brcond. 4054 // Check for those cases here. 4055 // setcc op, 0 4056 if (Imm == 0) { 4057 SDValue Op = N->getOperand(0); 4058 switch (CC) { 4059 default: break; 4060 case ISD::SETEQ: { 4061 Op = SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Op), 0); 4062 SDValue Ops[] = { Op, getI32Imm(27, dl), getI32Imm(5, dl), 4063 getI32Imm(31, dl) }; 4064 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); 4065 return true; 4066 } 4067 case ISD::SETNE: { 4068 if (isPPC64) break; 4069 SDValue AD = 4070 SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue, 4071 Op, getI32Imm(~0U, dl)), 0); 4072 CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, AD, Op, AD.getValue(1)); 4073 return true; 4074 } 4075 case ISD::SETLT: { 4076 SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl), 4077 getI32Imm(31, dl) }; 4078 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); 4079 return true; 4080 } 4081 case ISD::SETGT: { 4082 SDValue T = 4083 SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Op), 0); 4084 T = SDValue(CurDAG->getMachineNode(PPC::ANDC, dl, MVT::i32, T, Op), 0); 4085 SDValue Ops[] = { T, getI32Imm(1, dl), getI32Imm(31, dl), 4086 getI32Imm(31, dl) }; 4087 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); 4088 return true; 4089 } 4090 } 4091 } else if (Imm == ~0U) { // setcc op, -1 4092 SDValue Op = N->getOperand(0); 4093 switch (CC) { 4094 default: break; 4095 case ISD::SETEQ: 4096 if (isPPC64) break; 4097 Op = SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue, 4098 Op, getI32Imm(1, dl)), 0); 4099 CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, 4100 SDValue(CurDAG->getMachineNode(PPC::LI, dl, 4101 MVT::i32, 4102 getI32Imm(0, dl)), 4103 0), Op.getValue(1)); 4104 return true; 4105 case ISD::SETNE: { 4106 if (isPPC64) break; 4107 Op = SDValue(CurDAG->getMachineNode(PPC::NOR, dl, MVT::i32, Op, Op), 0); 4108 SDNode *AD = CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue, 4109 Op, getI32Imm(~0U, dl)); 4110 CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(AD, 0), Op, 4111 SDValue(AD, 1)); 4112 return true; 4113 } 4114 case ISD::SETLT: { 4115 SDValue AD = SDValue(CurDAG->getMachineNode(PPC::ADDI, dl, MVT::i32, Op, 4116 getI32Imm(1, dl)), 0); 4117 SDValue AN = SDValue(CurDAG->getMachineNode(PPC::AND, dl, MVT::i32, AD, 4118 Op), 0); 4119 SDValue Ops[] = { AN, getI32Imm(1, dl), getI32Imm(31, dl), 4120 getI32Imm(31, dl) }; 4121 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); 4122 return true; 4123 } 4124 case ISD::SETGT: { 4125 SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl), 4126 getI32Imm(31, dl) }; 4127 Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0); 4128 CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op, getI32Imm(1, dl)); 4129 return true; 4130 } 4131 } 4132 } 4133 } 4134 4135 SDValue LHS = N->getOperand(0); 4136 SDValue RHS = N->getOperand(1); 4137 4138 // Altivec Vector compare instructions do not set any CR register by default and 4139 // vector compare operations return the same type as the operands. 4140 if (LHS.getValueType().isVector()) { 4141 if (PPCSubTarget->hasQPX() || PPCSubTarget->hasSPE()) 4142 return false; 4143 4144 EVT VecVT = LHS.getValueType(); 4145 bool Swap, Negate; 4146 unsigned int VCmpInst = getVCmpInst(VecVT.getSimpleVT(), CC, 4147 PPCSubTarget->hasVSX(), Swap, Negate); 4148 if (Swap) 4149 std::swap(LHS, RHS); 4150 4151 EVT ResVT = VecVT.changeVectorElementTypeToInteger(); 4152 if (Negate) { 4153 SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, ResVT, LHS, RHS), 0); 4154 CurDAG->SelectNodeTo(N, PPCSubTarget->hasVSX() ? PPC::XXLNOR : PPC::VNOR, 4155 ResVT, VCmp, VCmp); 4156 return true; 4157 } 4158 4159 CurDAG->SelectNodeTo(N, VCmpInst, ResVT, LHS, RHS); 4160 return true; 4161 } 4162 4163 if (PPCSubTarget->useCRBits()) 4164 return false; 4165 4166 bool Inv; 4167 unsigned Idx = getCRIdxForSetCC(CC, Inv); 4168 SDValue CCReg = SelectCC(LHS, RHS, CC, dl); 4169 SDValue IntCR; 4170 4171 // SPE e*cmp* instructions only set the 'gt' bit, so hard-code that 4172 // The correct compare instruction is already set by SelectCC() 4173 if (PPCSubTarget->hasSPE() && LHS.getValueType().isFloatingPoint()) { 4174 Idx = 1; 4175 } 4176 4177 // Force the ccreg into CR7. 4178 SDValue CR7Reg = CurDAG->getRegister(PPC::CR7, MVT::i32); 4179 4180 SDValue InFlag(nullptr, 0); // Null incoming flag value. 4181 CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, CR7Reg, CCReg, 4182 InFlag).getValue(1); 4183 4184 IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg, 4185 CCReg), 0); 4186 4187 SDValue Ops[] = { IntCR, getI32Imm((32 - (3 - Idx)) & 31, dl), 4188 getI32Imm(31, dl), getI32Imm(31, dl) }; 4189 if (!Inv) { 4190 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); 4191 return true; 4192 } 4193 4194 // Get the specified bit. 4195 SDValue Tmp = 4196 SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0); 4197 CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1, dl)); 4198 return true; 4199 } 4200 4201 /// Does this node represent a load/store node whose address can be represented 4202 /// with a register plus an immediate that's a multiple of \p Val: 4203 bool PPCDAGToDAGISel::isOffsetMultipleOf(SDNode *N, unsigned Val) const { 4204 LoadSDNode *LDN = dyn_cast<LoadSDNode>(N); 4205 StoreSDNode *STN = dyn_cast<StoreSDNode>(N); 4206 SDValue AddrOp; 4207 if (LDN) 4208 AddrOp = LDN->getOperand(1); 4209 else if (STN) 4210 AddrOp = STN->getOperand(2); 4211 4212 // If the address points a frame object or a frame object with an offset, 4213 // we need to check the object alignment. 4214 short Imm = 0; 4215 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>( 4216 AddrOp.getOpcode() == ISD::ADD ? AddrOp.getOperand(0) : 4217 AddrOp)) { 4218 // If op0 is a frame index that is under aligned, we can't do it either, 4219 // because it is translated to r31 or r1 + slot + offset. We won't know the 4220 // slot number until the stack frame is finalized. 4221 const MachineFrameInfo &MFI = CurDAG->getMachineFunction().getFrameInfo(); 4222 unsigned SlotAlign = MFI.getObjectAlign(FI->getIndex()).value(); 4223 if ((SlotAlign % Val) != 0) 4224 return false; 4225 4226 // If we have an offset, we need further check on the offset. 4227 if (AddrOp.getOpcode() != ISD::ADD) 4228 return true; 4229 } 4230 4231 if (AddrOp.getOpcode() == ISD::ADD) 4232 return isIntS16Immediate(AddrOp.getOperand(1), Imm) && !(Imm % Val); 4233 4234 // If the address comes from the outside, the offset will be zero. 4235 return AddrOp.getOpcode() == ISD::CopyFromReg; 4236 } 4237 4238 void PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) { 4239 // Transfer memoperands. 4240 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); 4241 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp}); 4242 } 4243 4244 static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG, 4245 bool &NeedSwapOps, bool &IsUnCmp) { 4246 4247 assert(N->getOpcode() == ISD::SELECT_CC && "Expecting a SELECT_CC here."); 4248 4249 SDValue LHS = N->getOperand(0); 4250 SDValue RHS = N->getOperand(1); 4251 SDValue TrueRes = N->getOperand(2); 4252 SDValue FalseRes = N->getOperand(3); 4253 ConstantSDNode *TrueConst = dyn_cast<ConstantSDNode>(TrueRes); 4254 if (!TrueConst || (N->getSimpleValueType(0) != MVT::i64 && 4255 N->getSimpleValueType(0) != MVT::i32)) 4256 return false; 4257 4258 // We are looking for any of: 4259 // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, cc2)), cc1) 4260 // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, cc2)), cc1) 4261 // (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, 1, -1, cc2), seteq) 4262 // (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, -1, 1, cc2), seteq) 4263 int64_t TrueResVal = TrueConst->getSExtValue(); 4264 if ((TrueResVal < -1 || TrueResVal > 1) || 4265 (TrueResVal == -1 && FalseRes.getOpcode() != ISD::ZERO_EXTEND) || 4266 (TrueResVal == 1 && FalseRes.getOpcode() != ISD::SIGN_EXTEND) || 4267 (TrueResVal == 0 && 4268 (FalseRes.getOpcode() != ISD::SELECT_CC || CC != ISD::SETEQ))) 4269 return false; 4270 4271 bool InnerIsSel = FalseRes.getOpcode() == ISD::SELECT_CC; 4272 SDValue SetOrSelCC = InnerIsSel ? FalseRes : FalseRes.getOperand(0); 4273 if (SetOrSelCC.getOpcode() != ISD::SETCC && 4274 SetOrSelCC.getOpcode() != ISD::SELECT_CC) 4275 return false; 4276 4277 // Without this setb optimization, the outer SELECT_CC will be manually 4278 // selected to SELECT_CC_I4/SELECT_CC_I8 Pseudo, then expand-isel-pseudos pass 4279 // transforms pseudo instruction to isel instruction. When there are more than 4280 // one use for result like zext/sext, with current optimization we only see 4281 // isel is replaced by setb but can't see any significant gain. Since 4282 // setb has longer latency than original isel, we should avoid this. Another 4283 // point is that setb requires comparison always kept, it can break the 4284 // opportunity to get the comparison away if we have in future. 4285 if (!SetOrSelCC.hasOneUse() || (!InnerIsSel && !FalseRes.hasOneUse())) 4286 return false; 4287 4288 SDValue InnerLHS = SetOrSelCC.getOperand(0); 4289 SDValue InnerRHS = SetOrSelCC.getOperand(1); 4290 ISD::CondCode InnerCC = 4291 cast<CondCodeSDNode>(SetOrSelCC.getOperand(InnerIsSel ? 4 : 2))->get(); 4292 // If the inner comparison is a select_cc, make sure the true/false values are 4293 // 1/-1 and canonicalize it if needed. 4294 if (InnerIsSel) { 4295 ConstantSDNode *SelCCTrueConst = 4296 dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(2)); 4297 ConstantSDNode *SelCCFalseConst = 4298 dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(3)); 4299 if (!SelCCTrueConst || !SelCCFalseConst) 4300 return false; 4301 int64_t SelCCTVal = SelCCTrueConst->getSExtValue(); 4302 int64_t SelCCFVal = SelCCFalseConst->getSExtValue(); 4303 // The values must be -1/1 (requiring a swap) or 1/-1. 4304 if (SelCCTVal == -1 && SelCCFVal == 1) { 4305 std::swap(InnerLHS, InnerRHS); 4306 } else if (SelCCTVal != 1 || SelCCFVal != -1) 4307 return false; 4308 } 4309 4310 // Canonicalize unsigned case 4311 if (InnerCC == ISD::SETULT || InnerCC == ISD::SETUGT) { 4312 IsUnCmp = true; 4313 InnerCC = (InnerCC == ISD::SETULT) ? ISD::SETLT : ISD::SETGT; 4314 } 4315 4316 bool InnerSwapped = false; 4317 if (LHS == InnerRHS && RHS == InnerLHS) 4318 InnerSwapped = true; 4319 else if (LHS != InnerLHS || RHS != InnerRHS) 4320 return false; 4321 4322 switch (CC) { 4323 // (select_cc lhs, rhs, 0, \ 4324 // (select_cc [lr]hs, [lr]hs, 1, -1, setlt/setgt), seteq) 4325 case ISD::SETEQ: 4326 if (!InnerIsSel) 4327 return false; 4328 if (InnerCC != ISD::SETLT && InnerCC != ISD::SETGT) 4329 return false; 4330 NeedSwapOps = (InnerCC == ISD::SETGT) ? InnerSwapped : !InnerSwapped; 4331 break; 4332 4333 // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?lt) 4334 // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setgt)), setu?lt) 4335 // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setlt)), setu?lt) 4336 // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?lt) 4337 // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setgt)), setu?lt) 4338 // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setlt)), setu?lt) 4339 case ISD::SETULT: 4340 if (!IsUnCmp && InnerCC != ISD::SETNE) 4341 return false; 4342 IsUnCmp = true; 4343 LLVM_FALLTHROUGH; 4344 case ISD::SETLT: 4345 if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETGT && !InnerSwapped) || 4346 (InnerCC == ISD::SETLT && InnerSwapped)) 4347 NeedSwapOps = (TrueResVal == 1); 4348 else 4349 return false; 4350 break; 4351 4352 // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?gt) 4353 // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setlt)), setu?gt) 4354 // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setgt)), setu?gt) 4355 // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?gt) 4356 // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setlt)), setu?gt) 4357 // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setgt)), setu?gt) 4358 case ISD::SETUGT: 4359 if (!IsUnCmp && InnerCC != ISD::SETNE) 4360 return false; 4361 IsUnCmp = true; 4362 LLVM_FALLTHROUGH; 4363 case ISD::SETGT: 4364 if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETLT && !InnerSwapped) || 4365 (InnerCC == ISD::SETGT && InnerSwapped)) 4366 NeedSwapOps = (TrueResVal == -1); 4367 else 4368 return false; 4369 break; 4370 4371 default: 4372 return false; 4373 } 4374 4375 LLVM_DEBUG(dbgs() << "Found a node that can be lowered to a SETB: "); 4376 LLVM_DEBUG(N->dump()); 4377 4378 return true; 4379 } 4380 4381 bool PPCDAGToDAGISel::tryAsSingleRLWINM(SDNode *N) { 4382 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected"); 4383 unsigned Imm; 4384 if (!isInt32Immediate(N->getOperand(1), Imm)) 4385 return false; 4386 4387 SDLoc dl(N); 4388 SDValue Val = N->getOperand(0); 4389 unsigned SH, MB, ME; 4390 // If this is an and of a value rotated between 0 and 31 bits and then and'd 4391 // with a mask, emit rlwinm 4392 if (isRotateAndMask(Val.getNode(), Imm, false, SH, MB, ME)) { 4393 Val = Val.getOperand(0); 4394 SDValue Ops[] = {Val, getI32Imm(SH, dl), getI32Imm(MB, dl), 4395 getI32Imm(ME, dl)}; 4396 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); 4397 return true; 4398 } 4399 4400 // If this is just a masked value where the input is not handled, and 4401 // is not a rotate-left (handled by a pattern in the .td file), emit rlwinm 4402 if (isRunOfOnes(Imm, MB, ME) && Val.getOpcode() != ISD::ROTL) { 4403 SDValue Ops[] = {Val, getI32Imm(0, dl), getI32Imm(MB, dl), 4404 getI32Imm(ME, dl)}; 4405 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); 4406 return true; 4407 } 4408 4409 // AND X, 0 -> 0, not "rlwinm 32". 4410 if (Imm == 0) { 4411 ReplaceUses(SDValue(N, 0), N->getOperand(1)); 4412 return true; 4413 } 4414 4415 return false; 4416 } 4417 4418 bool PPCDAGToDAGISel::tryAsSingleRLWINM8(SDNode *N) { 4419 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected"); 4420 uint64_t Imm64; 4421 if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64)) 4422 return false; 4423 4424 unsigned MB, ME; 4425 if (isRunOfOnes64(Imm64, MB, ME) && MB >= 32 && MB <= ME) { 4426 // MB ME 4427 // +----------------------+ 4428 // |xxxxxxxxxxx00011111000| 4429 // +----------------------+ 4430 // 0 32 64 4431 // We can only do it if the MB is larger than 32 and MB <= ME 4432 // as RLWINM will replace the contents of [0 - 32) with [32 - 64) even 4433 // we didn't rotate it. 4434 SDLoc dl(N); 4435 SDValue Ops[] = {N->getOperand(0), getI64Imm(0, dl), getI64Imm(MB - 32, dl), 4436 getI64Imm(ME - 32, dl)}; 4437 CurDAG->SelectNodeTo(N, PPC::RLWINM8, MVT::i64, Ops); 4438 return true; 4439 } 4440 4441 return false; 4442 } 4443 4444 bool PPCDAGToDAGISel::tryAsPairOfRLDICL(SDNode *N) { 4445 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected"); 4446 uint64_t Imm64; 4447 if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64)) 4448 return false; 4449 4450 // Do nothing if it is 16-bit imm as the pattern in the .td file handle 4451 // it well with "andi.". 4452 if (isUInt<16>(Imm64)) 4453 return false; 4454 4455 SDLoc Loc(N); 4456 SDValue Val = N->getOperand(0); 4457 4458 // Optimized with two rldicl's as follows: 4459 // Add missing bits on left to the mask and check that the mask is a 4460 // wrapped run of ones, i.e. 4461 // Change pattern |0001111100000011111111| 4462 // to |1111111100000011111111|. 4463 unsigned NumOfLeadingZeros = countLeadingZeros(Imm64); 4464 if (NumOfLeadingZeros != 0) 4465 Imm64 |= maskLeadingOnes<uint64_t>(NumOfLeadingZeros); 4466 4467 unsigned MB, ME; 4468 if (!isRunOfOnes64(Imm64, MB, ME)) 4469 return false; 4470 4471 // ME MB MB-ME+63 4472 // +----------------------+ +----------------------+ 4473 // |1111111100000011111111| -> |0000001111111111111111| 4474 // +----------------------+ +----------------------+ 4475 // 0 63 0 63 4476 // There are ME + 1 ones on the left and (MB - ME + 63) & 63 zeros in between. 4477 unsigned OnesOnLeft = ME + 1; 4478 unsigned ZerosInBetween = (MB - ME + 63) & 63; 4479 // Rotate left by OnesOnLeft (so leading ones are now trailing ones) and clear 4480 // on the left the bits that are already zeros in the mask. 4481 Val = SDValue(CurDAG->getMachineNode(PPC::RLDICL, Loc, MVT::i64, Val, 4482 getI64Imm(OnesOnLeft, Loc), 4483 getI64Imm(ZerosInBetween, Loc)), 4484 0); 4485 // MB-ME+63 ME MB 4486 // +----------------------+ +----------------------+ 4487 // |0000001111111111111111| -> |0001111100000011111111| 4488 // +----------------------+ +----------------------+ 4489 // 0 63 0 63 4490 // Rotate back by 64 - OnesOnLeft to undo previous rotate. Then clear on the 4491 // left the number of ones we previously added. 4492 SDValue Ops[] = {Val, getI64Imm(64 - OnesOnLeft, Loc), 4493 getI64Imm(NumOfLeadingZeros, Loc)}; 4494 CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops); 4495 return true; 4496 } 4497 4498 bool PPCDAGToDAGISel::tryAsSingleRLWIMI(SDNode *N) { 4499 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected"); 4500 unsigned Imm; 4501 if (!isInt32Immediate(N->getOperand(1), Imm)) 4502 return false; 4503 4504 SDValue Val = N->getOperand(0); 4505 unsigned Imm2; 4506 // ISD::OR doesn't get all the bitfield insertion fun. 4507 // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) might be a 4508 // bitfield insert. 4509 if (Val.getOpcode() != ISD::OR || !isInt32Immediate(Val.getOperand(1), Imm2)) 4510 return false; 4511 4512 // The idea here is to check whether this is equivalent to: 4513 // (c1 & m) | (x & ~m) 4514 // where m is a run-of-ones mask. The logic here is that, for each bit in 4515 // c1 and c2: 4516 // - if both are 1, then the output will be 1. 4517 // - if both are 0, then the output will be 0. 4518 // - if the bit in c1 is 0, and the bit in c2 is 1, then the output will 4519 // come from x. 4520 // - if the bit in c1 is 1, and the bit in c2 is 0, then the output will 4521 // be 0. 4522 // If that last condition is never the case, then we can form m from the 4523 // bits that are the same between c1 and c2. 4524 unsigned MB, ME; 4525 if (isRunOfOnes(~(Imm ^ Imm2), MB, ME) && !(~Imm & Imm2)) { 4526 SDLoc dl(N); 4527 SDValue Ops[] = {Val.getOperand(0), Val.getOperand(1), getI32Imm(0, dl), 4528 getI32Imm(MB, dl), getI32Imm(ME, dl)}; 4529 ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops)); 4530 return true; 4531 } 4532 4533 return false; 4534 } 4535 4536 bool PPCDAGToDAGISel::tryAsSingleRLDICL(SDNode *N) { 4537 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected"); 4538 uint64_t Imm64; 4539 if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) || !isMask_64(Imm64)) 4540 return false; 4541 4542 // If this is a 64-bit zero-extension mask, emit rldicl. 4543 unsigned MB = 64 - countTrailingOnes(Imm64); 4544 unsigned SH = 0; 4545 unsigned Imm; 4546 SDValue Val = N->getOperand(0); 4547 SDLoc dl(N); 4548 4549 if (Val.getOpcode() == ISD::ANY_EXTEND) { 4550 auto Op0 = Val.getOperand(0); 4551 if (Op0.getOpcode() == ISD::SRL && 4552 isInt32Immediate(Op0.getOperand(1).getNode(), Imm) && Imm <= MB) { 4553 4554 auto ResultType = Val.getNode()->getValueType(0); 4555 auto ImDef = CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, ResultType); 4556 SDValue IDVal(ImDef, 0); 4557 4558 Val = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, ResultType, 4559 IDVal, Op0.getOperand(0), 4560 getI32Imm(1, dl)), 4561 0); 4562 SH = 64 - Imm; 4563 } 4564 } 4565 4566 // If the operand is a logical right shift, we can fold it into this 4567 // instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb) 4568 // for n <= mb. The right shift is really a left rotate followed by a 4569 // mask, and this mask is a more-restrictive sub-mask of the mask implied 4570 // by the shift. 4571 if (Val.getOpcode() == ISD::SRL && 4572 isInt32Immediate(Val.getOperand(1).getNode(), Imm) && Imm <= MB) { 4573 assert(Imm < 64 && "Illegal shift amount"); 4574 Val = Val.getOperand(0); 4575 SH = 64 - Imm; 4576 } 4577 4578 SDValue Ops[] = {Val, getI32Imm(SH, dl), getI32Imm(MB, dl)}; 4579 CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops); 4580 return true; 4581 } 4582 4583 bool PPCDAGToDAGISel::tryAsSingleRLDICR(SDNode *N) { 4584 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected"); 4585 uint64_t Imm64; 4586 if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) || 4587 !isMask_64(~Imm64)) 4588 return false; 4589 4590 // If this is a negated 64-bit zero-extension mask, 4591 // i.e. the immediate is a sequence of ones from most significant side 4592 // and all zero for reminder, we should use rldicr. 4593 unsigned MB = 63 - countTrailingOnes(~Imm64); 4594 unsigned SH = 0; 4595 SDLoc dl(N); 4596 SDValue Ops[] = {N->getOperand(0), getI32Imm(SH, dl), getI32Imm(MB, dl)}; 4597 CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops); 4598 return true; 4599 } 4600 4601 bool PPCDAGToDAGISel::tryAsSingleRLDIMI(SDNode *N) { 4602 assert(N->getOpcode() == ISD::OR && "ISD::OR SDNode expected"); 4603 uint64_t Imm64; 4604 unsigned MB, ME; 4605 SDValue N0 = N->getOperand(0); 4606 4607 // We won't get fewer instructions if the imm is 32-bit integer. 4608 // rldimi requires the imm to have consecutive ones with both sides zero. 4609 // Also, make sure the first Op has only one use, otherwise this may increase 4610 // register pressure since rldimi is destructive. 4611 if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) || 4612 isUInt<32>(Imm64) || !isRunOfOnes64(Imm64, MB, ME) || !N0.hasOneUse()) 4613 return false; 4614 4615 unsigned SH = 63 - ME; 4616 SDLoc Dl(N); 4617 // Use select64Imm for making LI instr instead of directly putting Imm64 4618 SDValue Ops[] = { 4619 N->getOperand(0), 4620 SDValue(selectI64Imm(CurDAG, getI64Imm(-1, Dl).getNode()), 0), 4621 getI32Imm(SH, Dl), getI32Imm(MB, Dl)}; 4622 CurDAG->SelectNodeTo(N, PPC::RLDIMI, MVT::i64, Ops); 4623 return true; 4624 } 4625 4626 // Select - Convert the specified operand from a target-independent to a 4627 // target-specific node if it hasn't already been changed. 4628 void PPCDAGToDAGISel::Select(SDNode *N) { 4629 SDLoc dl(N); 4630 if (N->isMachineOpcode()) { 4631 N->setNodeId(-1); 4632 return; // Already selected. 4633 } 4634 4635 // In case any misguided DAG-level optimizations form an ADD with a 4636 // TargetConstant operand, crash here instead of miscompiling (by selecting 4637 // an r+r add instead of some kind of r+i add). 4638 if (N->getOpcode() == ISD::ADD && 4639 N->getOperand(1).getOpcode() == ISD::TargetConstant) 4640 llvm_unreachable("Invalid ADD with TargetConstant operand"); 4641 4642 // Try matching complex bit permutations before doing anything else. 4643 if (tryBitPermutation(N)) 4644 return; 4645 4646 // Try to emit integer compares as GPR-only sequences (i.e. no use of CR). 4647 if (tryIntCompareInGPR(N)) 4648 return; 4649 4650 switch (N->getOpcode()) { 4651 default: break; 4652 4653 case ISD::Constant: 4654 if (N->getValueType(0) == MVT::i64) { 4655 ReplaceNode(N, selectI64Imm(CurDAG, N)); 4656 return; 4657 } 4658 break; 4659 4660 case ISD::SETCC: 4661 if (trySETCC(N)) 4662 return; 4663 break; 4664 // These nodes will be transformed into GETtlsADDR32 node, which 4665 // later becomes BL_TLS __tls_get_addr(sym at tlsgd)@PLT 4666 case PPCISD::ADDI_TLSLD_L_ADDR: 4667 case PPCISD::ADDI_TLSGD_L_ADDR: { 4668 const Module *Mod = MF->getFunction().getParent(); 4669 if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 || 4670 !PPCSubTarget->isSecurePlt() || !PPCSubTarget->isTargetELF() || 4671 Mod->getPICLevel() == PICLevel::SmallPIC) 4672 break; 4673 // Attach global base pointer on GETtlsADDR32 node in order to 4674 // generate secure plt code for TLS symbols. 4675 getGlobalBaseReg(); 4676 } break; 4677 case PPCISD::CALL: { 4678 if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 || 4679 !TM.isPositionIndependent() || !PPCSubTarget->isSecurePlt() || 4680 !PPCSubTarget->isTargetELF()) 4681 break; 4682 4683 SDValue Op = N->getOperand(1); 4684 4685 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) { 4686 if (GA->getTargetFlags() == PPCII::MO_PLT) 4687 getGlobalBaseReg(); 4688 } 4689 else if (ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op)) { 4690 if (ES->getTargetFlags() == PPCII::MO_PLT) 4691 getGlobalBaseReg(); 4692 } 4693 } 4694 break; 4695 4696 case PPCISD::GlobalBaseReg: 4697 ReplaceNode(N, getGlobalBaseReg()); 4698 return; 4699 4700 case ISD::FrameIndex: 4701 selectFrameIndex(N, N); 4702 return; 4703 4704 case PPCISD::MFOCRF: { 4705 SDValue InFlag = N->getOperand(1); 4706 ReplaceNode(N, CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, 4707 N->getOperand(0), InFlag)); 4708 return; 4709 } 4710 4711 case PPCISD::READ_TIME_BASE: 4712 ReplaceNode(N, CurDAG->getMachineNode(PPC::ReadTB, dl, MVT::i32, MVT::i32, 4713 MVT::Other, N->getOperand(0))); 4714 return; 4715 4716 case PPCISD::SRA_ADDZE: { 4717 SDValue N0 = N->getOperand(0); 4718 SDValue ShiftAmt = 4719 CurDAG->getTargetConstant(*cast<ConstantSDNode>(N->getOperand(1))-> 4720 getConstantIntValue(), dl, 4721 N->getValueType(0)); 4722 if (N->getValueType(0) == MVT::i64) { 4723 SDNode *Op = 4724 CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, MVT::Glue, 4725 N0, ShiftAmt); 4726 CurDAG->SelectNodeTo(N, PPC::ADDZE8, MVT::i64, SDValue(Op, 0), 4727 SDValue(Op, 1)); 4728 return; 4729 } else { 4730 assert(N->getValueType(0) == MVT::i32 && 4731 "Expecting i64 or i32 in PPCISD::SRA_ADDZE"); 4732 SDNode *Op = 4733 CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue, 4734 N0, ShiftAmt); 4735 CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, SDValue(Op, 0), 4736 SDValue(Op, 1)); 4737 return; 4738 } 4739 } 4740 4741 case ISD::STORE: { 4742 // Change TLS initial-exec D-form stores to X-form stores. 4743 StoreSDNode *ST = cast<StoreSDNode>(N); 4744 if (EnableTLSOpt && PPCSubTarget->isELFv2ABI() && 4745 ST->getAddressingMode() != ISD::PRE_INC) 4746 if (tryTLSXFormStore(ST)) 4747 return; 4748 break; 4749 } 4750 case ISD::LOAD: { 4751 // Handle preincrement loads. 4752 LoadSDNode *LD = cast<LoadSDNode>(N); 4753 EVT LoadedVT = LD->getMemoryVT(); 4754 4755 // Normal loads are handled by code generated from the .td file. 4756 if (LD->getAddressingMode() != ISD::PRE_INC) { 4757 // Change TLS initial-exec D-form loads to X-form loads. 4758 if (EnableTLSOpt && PPCSubTarget->isELFv2ABI()) 4759 if (tryTLSXFormLoad(LD)) 4760 return; 4761 break; 4762 } 4763 4764 SDValue Offset = LD->getOffset(); 4765 if (Offset.getOpcode() == ISD::TargetConstant || 4766 Offset.getOpcode() == ISD::TargetGlobalAddress) { 4767 4768 unsigned Opcode; 4769 bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD; 4770 if (LD->getValueType(0) != MVT::i64) { 4771 // Handle PPC32 integer and normal FP loads. 4772 assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load"); 4773 switch (LoadedVT.getSimpleVT().SimpleTy) { 4774 default: llvm_unreachable("Invalid PPC load type!"); 4775 case MVT::f64: Opcode = PPC::LFDU; break; 4776 case MVT::f32: Opcode = PPC::LFSU; break; 4777 case MVT::i32: Opcode = PPC::LWZU; break; 4778 case MVT::i16: Opcode = isSExt ? PPC::LHAU : PPC::LHZU; break; 4779 case MVT::i1: 4780 case MVT::i8: Opcode = PPC::LBZU; break; 4781 } 4782 } else { 4783 assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!"); 4784 assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load"); 4785 switch (LoadedVT.getSimpleVT().SimpleTy) { 4786 default: llvm_unreachable("Invalid PPC load type!"); 4787 case MVT::i64: Opcode = PPC::LDU; break; 4788 case MVT::i32: Opcode = PPC::LWZU8; break; 4789 case MVT::i16: Opcode = isSExt ? PPC::LHAU8 : PPC::LHZU8; break; 4790 case MVT::i1: 4791 case MVT::i8: Opcode = PPC::LBZU8; break; 4792 } 4793 } 4794 4795 SDValue Chain = LD->getChain(); 4796 SDValue Base = LD->getBasePtr(); 4797 SDValue Ops[] = { Offset, Base, Chain }; 4798 SDNode *MN = CurDAG->getMachineNode( 4799 Opcode, dl, LD->getValueType(0), 4800 PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops); 4801 transferMemOperands(N, MN); 4802 ReplaceNode(N, MN); 4803 return; 4804 } else { 4805 unsigned Opcode; 4806 bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD; 4807 if (LD->getValueType(0) != MVT::i64) { 4808 // Handle PPC32 integer and normal FP loads. 4809 assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load"); 4810 switch (LoadedVT.getSimpleVT().SimpleTy) { 4811 default: llvm_unreachable("Invalid PPC load type!"); 4812 case MVT::v4f64: Opcode = PPC::QVLFDUX; break; // QPX 4813 case MVT::v4f32: Opcode = PPC::QVLFSUX; break; // QPX 4814 case MVT::f64: Opcode = PPC::LFDUX; break; 4815 case MVT::f32: Opcode = PPC::LFSUX; break; 4816 case MVT::i32: Opcode = PPC::LWZUX; break; 4817 case MVT::i16: Opcode = isSExt ? PPC::LHAUX : PPC::LHZUX; break; 4818 case MVT::i1: 4819 case MVT::i8: Opcode = PPC::LBZUX; break; 4820 } 4821 } else { 4822 assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!"); 4823 assert((!isSExt || LoadedVT == MVT::i16 || LoadedVT == MVT::i32) && 4824 "Invalid sext update load"); 4825 switch (LoadedVT.getSimpleVT().SimpleTy) { 4826 default: llvm_unreachable("Invalid PPC load type!"); 4827 case MVT::i64: Opcode = PPC::LDUX; break; 4828 case MVT::i32: Opcode = isSExt ? PPC::LWAUX : PPC::LWZUX8; break; 4829 case MVT::i16: Opcode = isSExt ? PPC::LHAUX8 : PPC::LHZUX8; break; 4830 case MVT::i1: 4831 case MVT::i8: Opcode = PPC::LBZUX8; break; 4832 } 4833 } 4834 4835 SDValue Chain = LD->getChain(); 4836 SDValue Base = LD->getBasePtr(); 4837 SDValue Ops[] = { Base, Offset, Chain }; 4838 SDNode *MN = CurDAG->getMachineNode( 4839 Opcode, dl, LD->getValueType(0), 4840 PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops); 4841 transferMemOperands(N, MN); 4842 ReplaceNode(N, MN); 4843 return; 4844 } 4845 } 4846 4847 case ISD::AND: 4848 // If this is an 'and' with a mask, try to emit rlwinm/rldicl/rldicr 4849 if (tryAsSingleRLWINM(N) || tryAsSingleRLWIMI(N) || tryAsSingleRLDICL(N) || 4850 tryAsSingleRLDICR(N) || tryAsSingleRLWINM8(N) || tryAsPairOfRLDICL(N)) 4851 return; 4852 4853 // Other cases are autogenerated. 4854 break; 4855 case ISD::OR: { 4856 if (N->getValueType(0) == MVT::i32) 4857 if (tryBitfieldInsert(N)) 4858 return; 4859 4860 int16_t Imm; 4861 if (N->getOperand(0)->getOpcode() == ISD::FrameIndex && 4862 isIntS16Immediate(N->getOperand(1), Imm)) { 4863 KnownBits LHSKnown = CurDAG->computeKnownBits(N->getOperand(0)); 4864 4865 // If this is equivalent to an add, then we can fold it with the 4866 // FrameIndex calculation. 4867 if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)Imm) == ~0ULL) { 4868 selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm); 4869 return; 4870 } 4871 } 4872 4873 // If this is 'or' against an imm with consecutive ones and both sides zero, 4874 // try to emit rldimi 4875 if (tryAsSingleRLDIMI(N)) 4876 return; 4877 4878 // OR with a 32-bit immediate can be handled by ori + oris 4879 // without creating an immediate in a GPR. 4880 uint64_t Imm64 = 0; 4881 bool IsPPC64 = PPCSubTarget->isPPC64(); 4882 if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) && 4883 (Imm64 & ~0xFFFFFFFFuLL) == 0) { 4884 // If ImmHi (ImmHi) is zero, only one ori (oris) is generated later. 4885 uint64_t ImmHi = Imm64 >> 16; 4886 uint64_t ImmLo = Imm64 & 0xFFFF; 4887 if (ImmHi != 0 && ImmLo != 0) { 4888 SDNode *Lo = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, 4889 N->getOperand(0), 4890 getI16Imm(ImmLo, dl)); 4891 SDValue Ops1[] = { SDValue(Lo, 0), getI16Imm(ImmHi, dl)}; 4892 CurDAG->SelectNodeTo(N, PPC::ORIS8, MVT::i64, Ops1); 4893 return; 4894 } 4895 } 4896 4897 // Other cases are autogenerated. 4898 break; 4899 } 4900 case ISD::XOR: { 4901 // XOR with a 32-bit immediate can be handled by xori + xoris 4902 // without creating an immediate in a GPR. 4903 uint64_t Imm64 = 0; 4904 bool IsPPC64 = PPCSubTarget->isPPC64(); 4905 if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) && 4906 (Imm64 & ~0xFFFFFFFFuLL) == 0) { 4907 // If ImmHi (ImmHi) is zero, only one xori (xoris) is generated later. 4908 uint64_t ImmHi = Imm64 >> 16; 4909 uint64_t ImmLo = Imm64 & 0xFFFF; 4910 if (ImmHi != 0 && ImmLo != 0) { 4911 SDNode *Lo = CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, 4912 N->getOperand(0), 4913 getI16Imm(ImmLo, dl)); 4914 SDValue Ops1[] = { SDValue(Lo, 0), getI16Imm(ImmHi, dl)}; 4915 CurDAG->SelectNodeTo(N, PPC::XORIS8, MVT::i64, Ops1); 4916 return; 4917 } 4918 } 4919 4920 break; 4921 } 4922 case ISD::ADD: { 4923 int16_t Imm; 4924 if (N->getOperand(0)->getOpcode() == ISD::FrameIndex && 4925 isIntS16Immediate(N->getOperand(1), Imm)) { 4926 selectFrameIndex(N, N->getOperand(0).getNode(), (int)Imm); 4927 return; 4928 } 4929 4930 break; 4931 } 4932 case ISD::SHL: { 4933 unsigned Imm, SH, MB, ME; 4934 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) && 4935 isRotateAndMask(N, Imm, true, SH, MB, ME)) { 4936 SDValue Ops[] = { N->getOperand(0).getOperand(0), 4937 getI32Imm(SH, dl), getI32Imm(MB, dl), 4938 getI32Imm(ME, dl) }; 4939 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); 4940 return; 4941 } 4942 4943 // Other cases are autogenerated. 4944 break; 4945 } 4946 case ISD::SRL: { 4947 unsigned Imm, SH, MB, ME; 4948 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) && 4949 isRotateAndMask(N, Imm, true, SH, MB, ME)) { 4950 SDValue Ops[] = { N->getOperand(0).getOperand(0), 4951 getI32Imm(SH, dl), getI32Imm(MB, dl), 4952 getI32Imm(ME, dl) }; 4953 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops); 4954 return; 4955 } 4956 4957 // Other cases are autogenerated. 4958 break; 4959 } 4960 // FIXME: Remove this once the ANDI glue bug is fixed: 4961 case PPCISD::ANDI_rec_1_EQ_BIT: 4962 case PPCISD::ANDI_rec_1_GT_BIT: { 4963 if (!ANDIGlueBug) 4964 break; 4965 4966 EVT InVT = N->getOperand(0).getValueType(); 4967 assert((InVT == MVT::i64 || InVT == MVT::i32) && 4968 "Invalid input type for ANDI_rec_1_EQ_BIT"); 4969 4970 unsigned Opcode = (InVT == MVT::i64) ? PPC::ANDI8_rec : PPC::ANDI_rec; 4971 SDValue AndI(CurDAG->getMachineNode(Opcode, dl, InVT, MVT::Glue, 4972 N->getOperand(0), 4973 CurDAG->getTargetConstant(1, dl, InVT)), 4974 0); 4975 SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32); 4976 SDValue SRIdxVal = CurDAG->getTargetConstant( 4977 N->getOpcode() == PPCISD::ANDI_rec_1_EQ_BIT ? PPC::sub_eq : PPC::sub_gt, 4978 dl, MVT::i32); 4979 4980 CurDAG->SelectNodeTo(N, TargetOpcode::EXTRACT_SUBREG, MVT::i1, CR0Reg, 4981 SRIdxVal, SDValue(AndI.getNode(), 1) /* glue */); 4982 return; 4983 } 4984 case ISD::SELECT_CC: { 4985 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get(); 4986 EVT PtrVT = 4987 CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout()); 4988 bool isPPC64 = (PtrVT == MVT::i64); 4989 4990 // If this is a select of i1 operands, we'll pattern match it. 4991 if (PPCSubTarget->useCRBits() && 4992 N->getOperand(0).getValueType() == MVT::i1) 4993 break; 4994 4995 if (PPCSubTarget->isISA3_0() && PPCSubTarget->isPPC64()) { 4996 bool NeedSwapOps = false; 4997 bool IsUnCmp = false; 4998 if (mayUseP9Setb(N, CC, CurDAG, NeedSwapOps, IsUnCmp)) { 4999 SDValue LHS = N->getOperand(0); 5000 SDValue RHS = N->getOperand(1); 5001 if (NeedSwapOps) 5002 std::swap(LHS, RHS); 5003 5004 // Make use of SelectCC to generate the comparison to set CR bits, for 5005 // equality comparisons having one literal operand, SelectCC probably 5006 // doesn't need to materialize the whole literal and just use xoris to 5007 // check it first, it leads the following comparison result can't 5008 // exactly represent GT/LT relationship. So to avoid this we specify 5009 // SETGT/SETUGT here instead of SETEQ. 5010 SDValue GenCC = 5011 SelectCC(LHS, RHS, IsUnCmp ? ISD::SETUGT : ISD::SETGT, dl); 5012 CurDAG->SelectNodeTo( 5013 N, N->getSimpleValueType(0) == MVT::i64 ? PPC::SETB8 : PPC::SETB, 5014 N->getValueType(0), GenCC); 5015 NumP9Setb++; 5016 return; 5017 } 5018 } 5019 5020 // Handle the setcc cases here. select_cc lhs, 0, 1, 0, cc 5021 if (!isPPC64) 5022 if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1))) 5023 if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N->getOperand(2))) 5024 if (ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N->getOperand(3))) 5025 if (N1C->isNullValue() && N3C->isNullValue() && 5026 N2C->getZExtValue() == 1ULL && CC == ISD::SETNE && 5027 // FIXME: Implement this optzn for PPC64. 5028 N->getValueType(0) == MVT::i32) { 5029 SDNode *Tmp = 5030 CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue, 5031 N->getOperand(0), getI32Imm(~0U, dl)); 5032 CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(Tmp, 0), 5033 N->getOperand(0), SDValue(Tmp, 1)); 5034 return; 5035 } 5036 5037 SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl); 5038 5039 if (N->getValueType(0) == MVT::i1) { 5040 // An i1 select is: (c & t) | (!c & f). 5041 bool Inv; 5042 unsigned Idx = getCRIdxForSetCC(CC, Inv); 5043 5044 unsigned SRI; 5045 switch (Idx) { 5046 default: llvm_unreachable("Invalid CC index"); 5047 case 0: SRI = PPC::sub_lt; break; 5048 case 1: SRI = PPC::sub_gt; break; 5049 case 2: SRI = PPC::sub_eq; break; 5050 case 3: SRI = PPC::sub_un; break; 5051 } 5052 5053 SDValue CCBit = CurDAG->getTargetExtractSubreg(SRI, dl, MVT::i1, CCReg); 5054 5055 SDValue NotCCBit(CurDAG->getMachineNode(PPC::CRNOR, dl, MVT::i1, 5056 CCBit, CCBit), 0); 5057 SDValue C = Inv ? NotCCBit : CCBit, 5058 NotC = Inv ? CCBit : NotCCBit; 5059 5060 SDValue CAndT(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1, 5061 C, N->getOperand(2)), 0); 5062 SDValue NotCAndF(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1, 5063 NotC, N->getOperand(3)), 0); 5064 5065 CurDAG->SelectNodeTo(N, PPC::CROR, MVT::i1, CAndT, NotCAndF); 5066 return; 5067 } 5068 5069 unsigned BROpc = 5070 getPredicateForSetCC(CC, N->getOperand(0).getValueType(), PPCSubTarget); 5071 5072 unsigned SelectCCOp; 5073 if (N->getValueType(0) == MVT::i32) 5074 SelectCCOp = PPC::SELECT_CC_I4; 5075 else if (N->getValueType(0) == MVT::i64) 5076 SelectCCOp = PPC::SELECT_CC_I8; 5077 else if (N->getValueType(0) == MVT::f32) { 5078 if (PPCSubTarget->hasP8Vector()) 5079 SelectCCOp = PPC::SELECT_CC_VSSRC; 5080 else if (PPCSubTarget->hasSPE()) 5081 SelectCCOp = PPC::SELECT_CC_SPE4; 5082 else 5083 SelectCCOp = PPC::SELECT_CC_F4; 5084 } else if (N->getValueType(0) == MVT::f64) { 5085 if (PPCSubTarget->hasVSX()) 5086 SelectCCOp = PPC::SELECT_CC_VSFRC; 5087 else if (PPCSubTarget->hasSPE()) 5088 SelectCCOp = PPC::SELECT_CC_SPE; 5089 else 5090 SelectCCOp = PPC::SELECT_CC_F8; 5091 } else if (N->getValueType(0) == MVT::f128) 5092 SelectCCOp = PPC::SELECT_CC_F16; 5093 else if (PPCSubTarget->hasSPE()) 5094 SelectCCOp = PPC::SELECT_CC_SPE; 5095 else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4f64) 5096 SelectCCOp = PPC::SELECT_CC_QFRC; 5097 else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4f32) 5098 SelectCCOp = PPC::SELECT_CC_QSRC; 5099 else if (PPCSubTarget->hasQPX() && N->getValueType(0) == MVT::v4i1) 5100 SelectCCOp = PPC::SELECT_CC_QBRC; 5101 else if (N->getValueType(0) == MVT::v2f64 || 5102 N->getValueType(0) == MVT::v2i64) 5103 SelectCCOp = PPC::SELECT_CC_VSRC; 5104 else 5105 SelectCCOp = PPC::SELECT_CC_VRRC; 5106 5107 SDValue Ops[] = { CCReg, N->getOperand(2), N->getOperand(3), 5108 getI32Imm(BROpc, dl) }; 5109 CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops); 5110 return; 5111 } 5112 case ISD::VECTOR_SHUFFLE: 5113 if (PPCSubTarget->hasVSX() && (N->getValueType(0) == MVT::v2f64 || 5114 N->getValueType(0) == MVT::v2i64)) { 5115 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); 5116 5117 SDValue Op1 = N->getOperand(SVN->getMaskElt(0) < 2 ? 0 : 1), 5118 Op2 = N->getOperand(SVN->getMaskElt(1) < 2 ? 0 : 1); 5119 unsigned DM[2]; 5120 5121 for (int i = 0; i < 2; ++i) 5122 if (SVN->getMaskElt(i) <= 0 || SVN->getMaskElt(i) == 2) 5123 DM[i] = 0; 5124 else 5125 DM[i] = 1; 5126 5127 if (Op1 == Op2 && DM[0] == 0 && DM[1] == 0 && 5128 Op1.getOpcode() == ISD::SCALAR_TO_VECTOR && 5129 isa<LoadSDNode>(Op1.getOperand(0))) { 5130 LoadSDNode *LD = cast<LoadSDNode>(Op1.getOperand(0)); 5131 SDValue Base, Offset; 5132 5133 if (LD->isUnindexed() && LD->hasOneUse() && Op1.hasOneUse() && 5134 (LD->getMemoryVT() == MVT::f64 || 5135 LD->getMemoryVT() == MVT::i64) && 5136 SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) { 5137 SDValue Chain = LD->getChain(); 5138 SDValue Ops[] = { Base, Offset, Chain }; 5139 MachineMemOperand *MemOp = LD->getMemOperand(); 5140 SDNode *NewN = CurDAG->SelectNodeTo(N, PPC::LXVDSX, 5141 N->getValueType(0), Ops); 5142 CurDAG->setNodeMemRefs(cast<MachineSDNode>(NewN), {MemOp}); 5143 return; 5144 } 5145 } 5146 5147 // For little endian, we must swap the input operands and adjust 5148 // the mask elements (reverse and invert them). 5149 if (PPCSubTarget->isLittleEndian()) { 5150 std::swap(Op1, Op2); 5151 unsigned tmp = DM[0]; 5152 DM[0] = 1 - DM[1]; 5153 DM[1] = 1 - tmp; 5154 } 5155 5156 SDValue DMV = CurDAG->getTargetConstant(DM[1] | (DM[0] << 1), dl, 5157 MVT::i32); 5158 SDValue Ops[] = { Op1, Op2, DMV }; 5159 CurDAG->SelectNodeTo(N, PPC::XXPERMDI, N->getValueType(0), Ops); 5160 return; 5161 } 5162 5163 break; 5164 case PPCISD::BDNZ: 5165 case PPCISD::BDZ: { 5166 bool IsPPC64 = PPCSubTarget->isPPC64(); 5167 SDValue Ops[] = { N->getOperand(1), N->getOperand(0) }; 5168 CurDAG->SelectNodeTo(N, N->getOpcode() == PPCISD::BDNZ 5169 ? (IsPPC64 ? PPC::BDNZ8 : PPC::BDNZ) 5170 : (IsPPC64 ? PPC::BDZ8 : PPC::BDZ), 5171 MVT::Other, Ops); 5172 return; 5173 } 5174 case PPCISD::COND_BRANCH: { 5175 // Op #0 is the Chain. 5176 // Op #1 is the PPC::PRED_* number. 5177 // Op #2 is the CR# 5178 // Op #3 is the Dest MBB 5179 // Op #4 is the Flag. 5180 // Prevent PPC::PRED_* from being selected into LI. 5181 unsigned PCC = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 5182 if (EnableBranchHint) 5183 PCC |= getBranchHint(PCC, *FuncInfo, N->getOperand(3)); 5184 5185 SDValue Pred = getI32Imm(PCC, dl); 5186 SDValue Ops[] = { Pred, N->getOperand(2), N->getOperand(3), 5187 N->getOperand(0), N->getOperand(4) }; 5188 CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops); 5189 return; 5190 } 5191 case ISD::BR_CC: { 5192 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get(); 5193 unsigned PCC = 5194 getPredicateForSetCC(CC, N->getOperand(2).getValueType(), PPCSubTarget); 5195 5196 if (N->getOperand(2).getValueType() == MVT::i1) { 5197 unsigned Opc; 5198 bool Swap; 5199 switch (PCC) { 5200 default: llvm_unreachable("Unexpected Boolean-operand predicate"); 5201 case PPC::PRED_LT: Opc = PPC::CRANDC; Swap = true; break; 5202 case PPC::PRED_LE: Opc = PPC::CRORC; Swap = true; break; 5203 case PPC::PRED_EQ: Opc = PPC::CREQV; Swap = false; break; 5204 case PPC::PRED_GE: Opc = PPC::CRORC; Swap = false; break; 5205 case PPC::PRED_GT: Opc = PPC::CRANDC; Swap = false; break; 5206 case PPC::PRED_NE: Opc = PPC::CRXOR; Swap = false; break; 5207 } 5208 5209 // A signed comparison of i1 values produces the opposite result to an 5210 // unsigned one if the condition code includes less-than or greater-than. 5211 // This is because 1 is the most negative signed i1 number and the most 5212 // positive unsigned i1 number. The CR-logical operations used for such 5213 // comparisons are non-commutative so for signed comparisons vs. unsigned 5214 // ones, the input operands just need to be swapped. 5215 if (ISD::isSignedIntSetCC(CC)) 5216 Swap = !Swap; 5217 5218 SDValue BitComp(CurDAG->getMachineNode(Opc, dl, MVT::i1, 5219 N->getOperand(Swap ? 3 : 2), 5220 N->getOperand(Swap ? 2 : 3)), 0); 5221 CurDAG->SelectNodeTo(N, PPC::BC, MVT::Other, BitComp, N->getOperand(4), 5222 N->getOperand(0)); 5223 return; 5224 } 5225 5226 if (EnableBranchHint) 5227 PCC |= getBranchHint(PCC, *FuncInfo, N->getOperand(4)); 5228 5229 SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl); 5230 SDValue Ops[] = { getI32Imm(PCC, dl), CondCode, 5231 N->getOperand(4), N->getOperand(0) }; 5232 CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops); 5233 return; 5234 } 5235 case ISD::BRIND: { 5236 // FIXME: Should custom lower this. 5237 SDValue Chain = N->getOperand(0); 5238 SDValue Target = N->getOperand(1); 5239 unsigned Opc = Target.getValueType() == MVT::i32 ? PPC::MTCTR : PPC::MTCTR8; 5240 unsigned Reg = Target.getValueType() == MVT::i32 ? PPC::BCTR : PPC::BCTR8; 5241 Chain = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Target, 5242 Chain), 0); 5243 CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain); 5244 return; 5245 } 5246 case PPCISD::TOC_ENTRY: { 5247 const bool isPPC64 = PPCSubTarget->isPPC64(); 5248 const bool isELFABI = PPCSubTarget->isSVR4ABI(); 5249 const bool isAIXABI = PPCSubTarget->isAIXABI(); 5250 5251 // PowerPC only support small, medium and large code model. 5252 const CodeModel::Model CModel = TM.getCodeModel(); 5253 assert(!(CModel == CodeModel::Tiny || CModel == CodeModel::Kernel) && 5254 "PowerPC doesn't support tiny or kernel code models."); 5255 5256 if (isAIXABI && CModel == CodeModel::Medium) 5257 report_fatal_error("Medium code model is not supported on AIX."); 5258 5259 // For 64-bit small code model, we allow SelectCodeCommon to handle this, 5260 // selecting one of LDtoc, LDtocJTI, LDtocCPT, and LDtocBA. 5261 if (isPPC64 && CModel == CodeModel::Small) 5262 break; 5263 5264 // Handle 32-bit small code model. 5265 if (!isPPC64) { 5266 // Transforms the ISD::TOC_ENTRY node to a PPCISD::LWZtoc. 5267 auto replaceWithLWZtoc = [this, &dl](SDNode *TocEntry) { 5268 SDValue GA = TocEntry->getOperand(0); 5269 SDValue TocBase = TocEntry->getOperand(1); 5270 SDNode *MN = CurDAG->getMachineNode(PPC::LWZtoc, dl, MVT::i32, GA, 5271 TocBase); 5272 transferMemOperands(TocEntry, MN); 5273 ReplaceNode(TocEntry, MN); 5274 }; 5275 5276 if (isELFABI) { 5277 assert(TM.isPositionIndependent() && 5278 "32-bit ELF can only have TOC entries in position independent" 5279 " code."); 5280 // 32-bit ELF always uses a small code model toc access. 5281 replaceWithLWZtoc(N); 5282 return; 5283 } 5284 5285 if (isAIXABI && CModel == CodeModel::Small) { 5286 replaceWithLWZtoc(N); 5287 return; 5288 } 5289 } 5290 5291 assert(CModel != CodeModel::Small && "All small code models handled."); 5292 5293 assert((isPPC64 || (isAIXABI && !isPPC64)) && "We are dealing with 64-bit" 5294 " ELF/AIX or 32-bit AIX in the following."); 5295 5296 // Transforms the ISD::TOC_ENTRY node for 32-bit AIX large code model mode 5297 // or 64-bit medium (ELF-only) or large (ELF and AIX) code model code. We 5298 // generate two instructions as described below. The first source operand 5299 // is a symbol reference. If it must be toc-referenced according to 5300 // PPCSubTarget, we generate: 5301 // [32-bit AIX] 5302 // LWZtocL(@sym, ADDIStocHA(%r2, @sym)) 5303 // [64-bit ELF/AIX] 5304 // LDtocL(@sym, ADDIStocHA8(%x2, @sym)) 5305 // Otherwise we generate: 5306 // ADDItocL(ADDIStocHA8(%x2, @sym), @sym) 5307 SDValue GA = N->getOperand(0); 5308 SDValue TOCbase = N->getOperand(1); 5309 5310 EVT VT = isPPC64 ? MVT::i64 : MVT::i32; 5311 SDNode *Tmp = CurDAG->getMachineNode( 5312 isPPC64 ? PPC::ADDIStocHA8 : PPC::ADDIStocHA, dl, VT, TOCbase, GA); 5313 5314 if (PPCLowering->isAccessedAsGotIndirect(GA)) { 5315 // If it is accessed as got-indirect, we need an extra LWZ/LD to load 5316 // the address. 5317 SDNode *MN = CurDAG->getMachineNode( 5318 isPPC64 ? PPC::LDtocL : PPC::LWZtocL, dl, VT, GA, SDValue(Tmp, 0)); 5319 5320 transferMemOperands(N, MN); 5321 ReplaceNode(N, MN); 5322 return; 5323 } 5324 5325 // Build the address relative to the TOC-pointer. 5326 ReplaceNode(N, CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64, 5327 SDValue(Tmp, 0), GA)); 5328 return; 5329 } 5330 case PPCISD::PPC32_PICGOT: 5331 // Generate a PIC-safe GOT reference. 5332 assert(PPCSubTarget->is32BitELFABI() && 5333 "PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4"); 5334 CurDAG->SelectNodeTo(N, PPC::PPC32PICGOT, 5335 PPCLowering->getPointerTy(CurDAG->getDataLayout()), 5336 MVT::i32); 5337 return; 5338 5339 case PPCISD::VADD_SPLAT: { 5340 // This expands into one of three sequences, depending on whether 5341 // the first operand is odd or even, positive or negative. 5342 assert(isa<ConstantSDNode>(N->getOperand(0)) && 5343 isa<ConstantSDNode>(N->getOperand(1)) && 5344 "Invalid operand on VADD_SPLAT!"); 5345 5346 int Elt = N->getConstantOperandVal(0); 5347 int EltSize = N->getConstantOperandVal(1); 5348 unsigned Opc1, Opc2, Opc3; 5349 EVT VT; 5350 5351 if (EltSize == 1) { 5352 Opc1 = PPC::VSPLTISB; 5353 Opc2 = PPC::VADDUBM; 5354 Opc3 = PPC::VSUBUBM; 5355 VT = MVT::v16i8; 5356 } else if (EltSize == 2) { 5357 Opc1 = PPC::VSPLTISH; 5358 Opc2 = PPC::VADDUHM; 5359 Opc3 = PPC::VSUBUHM; 5360 VT = MVT::v8i16; 5361 } else { 5362 assert(EltSize == 4 && "Invalid element size on VADD_SPLAT!"); 5363 Opc1 = PPC::VSPLTISW; 5364 Opc2 = PPC::VADDUWM; 5365 Opc3 = PPC::VSUBUWM; 5366 VT = MVT::v4i32; 5367 } 5368 5369 if ((Elt & 1) == 0) { 5370 // Elt is even, in the range [-32,-18] + [16,30]. 5371 // 5372 // Convert: VADD_SPLAT elt, size 5373 // Into: tmp = VSPLTIS[BHW] elt 5374 // VADDU[BHW]M tmp, tmp 5375 // Where: [BHW] = B for size = 1, H for size = 2, W for size = 4 5376 SDValue EltVal = getI32Imm(Elt >> 1, dl); 5377 SDNode *Tmp = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); 5378 SDValue TmpVal = SDValue(Tmp, 0); 5379 ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, TmpVal, TmpVal)); 5380 return; 5381 } else if (Elt > 0) { 5382 // Elt is odd and positive, in the range [17,31]. 5383 // 5384 // Convert: VADD_SPLAT elt, size 5385 // Into: tmp1 = VSPLTIS[BHW] elt-16 5386 // tmp2 = VSPLTIS[BHW] -16 5387 // VSUBU[BHW]M tmp1, tmp2 5388 SDValue EltVal = getI32Imm(Elt - 16, dl); 5389 SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); 5390 EltVal = getI32Imm(-16, dl); 5391 SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); 5392 ReplaceNode(N, CurDAG->getMachineNode(Opc3, dl, VT, SDValue(Tmp1, 0), 5393 SDValue(Tmp2, 0))); 5394 return; 5395 } else { 5396 // Elt is odd and negative, in the range [-31,-17]. 5397 // 5398 // Convert: VADD_SPLAT elt, size 5399 // Into: tmp1 = VSPLTIS[BHW] elt+16 5400 // tmp2 = VSPLTIS[BHW] -16 5401 // VADDU[BHW]M tmp1, tmp2 5402 SDValue EltVal = getI32Imm(Elt + 16, dl); 5403 SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); 5404 EltVal = getI32Imm(-16, dl); 5405 SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal); 5406 ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, SDValue(Tmp1, 0), 5407 SDValue(Tmp2, 0))); 5408 return; 5409 } 5410 } 5411 } 5412 5413 SelectCode(N); 5414 } 5415 5416 // If the target supports the cmpb instruction, do the idiom recognition here. 5417 // We don't do this as a DAG combine because we don't want to do it as nodes 5418 // are being combined (because we might miss part of the eventual idiom). We 5419 // don't want to do it during instruction selection because we want to reuse 5420 // the logic for lowering the masking operations already part of the 5421 // instruction selector. 5422 SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) { 5423 SDLoc dl(N); 5424 5425 assert(N->getOpcode() == ISD::OR && 5426 "Only OR nodes are supported for CMPB"); 5427 5428 SDValue Res; 5429 if (!PPCSubTarget->hasCMPB()) 5430 return Res; 5431 5432 if (N->getValueType(0) != MVT::i32 && 5433 N->getValueType(0) != MVT::i64) 5434 return Res; 5435 5436 EVT VT = N->getValueType(0); 5437 5438 SDValue RHS, LHS; 5439 bool BytesFound[8] = {false, false, false, false, false, false, false, false}; 5440 uint64_t Mask = 0, Alt = 0; 5441 5442 auto IsByteSelectCC = [this](SDValue O, unsigned &b, 5443 uint64_t &Mask, uint64_t &Alt, 5444 SDValue &LHS, SDValue &RHS) { 5445 if (O.getOpcode() != ISD::SELECT_CC) 5446 return false; 5447 ISD::CondCode CC = cast<CondCodeSDNode>(O.getOperand(4))->get(); 5448 5449 if (!isa<ConstantSDNode>(O.getOperand(2)) || 5450 !isa<ConstantSDNode>(O.getOperand(3))) 5451 return false; 5452 5453 uint64_t PM = O.getConstantOperandVal(2); 5454 uint64_t PAlt = O.getConstantOperandVal(3); 5455 for (b = 0; b < 8; ++b) { 5456 uint64_t Mask = UINT64_C(0xFF) << (8*b); 5457 if (PM && (PM & Mask) == PM && (PAlt & Mask) == PAlt) 5458 break; 5459 } 5460 5461 if (b == 8) 5462 return false; 5463 Mask |= PM; 5464 Alt |= PAlt; 5465 5466 if (!isa<ConstantSDNode>(O.getOperand(1)) || 5467 O.getConstantOperandVal(1) != 0) { 5468 SDValue Op0 = O.getOperand(0), Op1 = O.getOperand(1); 5469 if (Op0.getOpcode() == ISD::TRUNCATE) 5470 Op0 = Op0.getOperand(0); 5471 if (Op1.getOpcode() == ISD::TRUNCATE) 5472 Op1 = Op1.getOperand(0); 5473 5474 if (Op0.getOpcode() == ISD::SRL && Op1.getOpcode() == ISD::SRL && 5475 Op0.getOperand(1) == Op1.getOperand(1) && CC == ISD::SETEQ && 5476 isa<ConstantSDNode>(Op0.getOperand(1))) { 5477 5478 unsigned Bits = Op0.getValueSizeInBits(); 5479 if (b != Bits/8-1) 5480 return false; 5481 if (Op0.getConstantOperandVal(1) != Bits-8) 5482 return false; 5483 5484 LHS = Op0.getOperand(0); 5485 RHS = Op1.getOperand(0); 5486 return true; 5487 } 5488 5489 // When we have small integers (i16 to be specific), the form present 5490 // post-legalization uses SETULT in the SELECT_CC for the 5491 // higher-order byte, depending on the fact that the 5492 // even-higher-order bytes are known to all be zero, for example: 5493 // select_cc (xor $lhs, $rhs), 256, 65280, 0, setult 5494 // (so when the second byte is the same, because all higher-order 5495 // bits from bytes 3 and 4 are known to be zero, the result of the 5496 // xor can be at most 255) 5497 if (Op0.getOpcode() == ISD::XOR && CC == ISD::SETULT && 5498 isa<ConstantSDNode>(O.getOperand(1))) { 5499 5500 uint64_t ULim = O.getConstantOperandVal(1); 5501 if (ULim != (UINT64_C(1) << b*8)) 5502 return false; 5503 5504 // Now we need to make sure that the upper bytes are known to be 5505 // zero. 5506 unsigned Bits = Op0.getValueSizeInBits(); 5507 if (!CurDAG->MaskedValueIsZero( 5508 Op0, APInt::getHighBitsSet(Bits, Bits - (b + 1) * 8))) 5509 return false; 5510 5511 LHS = Op0.getOperand(0); 5512 RHS = Op0.getOperand(1); 5513 return true; 5514 } 5515 5516 return false; 5517 } 5518 5519 if (CC != ISD::SETEQ) 5520 return false; 5521 5522 SDValue Op = O.getOperand(0); 5523 if (Op.getOpcode() == ISD::AND) { 5524 if (!isa<ConstantSDNode>(Op.getOperand(1))) 5525 return false; 5526 if (Op.getConstantOperandVal(1) != (UINT64_C(0xFF) << (8*b))) 5527 return false; 5528 5529 SDValue XOR = Op.getOperand(0); 5530 if (XOR.getOpcode() == ISD::TRUNCATE) 5531 XOR = XOR.getOperand(0); 5532 if (XOR.getOpcode() != ISD::XOR) 5533 return false; 5534 5535 LHS = XOR.getOperand(0); 5536 RHS = XOR.getOperand(1); 5537 return true; 5538 } else if (Op.getOpcode() == ISD::SRL) { 5539 if (!isa<ConstantSDNode>(Op.getOperand(1))) 5540 return false; 5541 unsigned Bits = Op.getValueSizeInBits(); 5542 if (b != Bits/8-1) 5543 return false; 5544 if (Op.getConstantOperandVal(1) != Bits-8) 5545 return false; 5546 5547 SDValue XOR = Op.getOperand(0); 5548 if (XOR.getOpcode() == ISD::TRUNCATE) 5549 XOR = XOR.getOperand(0); 5550 if (XOR.getOpcode() != ISD::XOR) 5551 return false; 5552 5553 LHS = XOR.getOperand(0); 5554 RHS = XOR.getOperand(1); 5555 return true; 5556 } 5557 5558 return false; 5559 }; 5560 5561 SmallVector<SDValue, 8> Queue(1, SDValue(N, 0)); 5562 while (!Queue.empty()) { 5563 SDValue V = Queue.pop_back_val(); 5564 5565 for (const SDValue &O : V.getNode()->ops()) { 5566 unsigned b = 0; 5567 uint64_t M = 0, A = 0; 5568 SDValue OLHS, ORHS; 5569 if (O.getOpcode() == ISD::OR) { 5570 Queue.push_back(O); 5571 } else if (IsByteSelectCC(O, b, M, A, OLHS, ORHS)) { 5572 if (!LHS) { 5573 LHS = OLHS; 5574 RHS = ORHS; 5575 BytesFound[b] = true; 5576 Mask |= M; 5577 Alt |= A; 5578 } else if ((LHS == ORHS && RHS == OLHS) || 5579 (RHS == ORHS && LHS == OLHS)) { 5580 BytesFound[b] = true; 5581 Mask |= M; 5582 Alt |= A; 5583 } else { 5584 return Res; 5585 } 5586 } else { 5587 return Res; 5588 } 5589 } 5590 } 5591 5592 unsigned LastB = 0, BCnt = 0; 5593 for (unsigned i = 0; i < 8; ++i) 5594 if (BytesFound[LastB]) { 5595 ++BCnt; 5596 LastB = i; 5597 } 5598 5599 if (!LastB || BCnt < 2) 5600 return Res; 5601 5602 // Because we'll be zero-extending the output anyway if don't have a specific 5603 // value for each input byte (via the Mask), we can 'anyext' the inputs. 5604 if (LHS.getValueType() != VT) { 5605 LHS = CurDAG->getAnyExtOrTrunc(LHS, dl, VT); 5606 RHS = CurDAG->getAnyExtOrTrunc(RHS, dl, VT); 5607 } 5608 5609 Res = CurDAG->getNode(PPCISD::CMPB, dl, VT, LHS, RHS); 5610 5611 bool NonTrivialMask = ((int64_t) Mask) != INT64_C(-1); 5612 if (NonTrivialMask && !Alt) { 5613 // Res = Mask & CMPB 5614 Res = CurDAG->getNode(ISD::AND, dl, VT, Res, 5615 CurDAG->getConstant(Mask, dl, VT)); 5616 } else if (Alt) { 5617 // Res = (CMPB & Mask) | (~CMPB & Alt) 5618 // Which, as suggested here: 5619 // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge 5620 // can be written as: 5621 // Res = Alt ^ ((Alt ^ Mask) & CMPB) 5622 // useful because the (Alt ^ Mask) can be pre-computed. 5623 Res = CurDAG->getNode(ISD::AND, dl, VT, Res, 5624 CurDAG->getConstant(Mask ^ Alt, dl, VT)); 5625 Res = CurDAG->getNode(ISD::XOR, dl, VT, Res, 5626 CurDAG->getConstant(Alt, dl, VT)); 5627 } 5628 5629 return Res; 5630 } 5631 5632 // When CR bit registers are enabled, an extension of an i1 variable to a i32 5633 // or i64 value is lowered in terms of a SELECT_I[48] operation, and thus 5634 // involves constant materialization of a 0 or a 1 or both. If the result of 5635 // the extension is then operated upon by some operator that can be constant 5636 // folded with a constant 0 or 1, and that constant can be materialized using 5637 // only one instruction (like a zero or one), then we should fold in those 5638 // operations with the select. 5639 void PPCDAGToDAGISel::foldBoolExts(SDValue &Res, SDNode *&N) { 5640 if (!PPCSubTarget->useCRBits()) 5641 return; 5642 5643 if (N->getOpcode() != ISD::ZERO_EXTEND && 5644 N->getOpcode() != ISD::SIGN_EXTEND && 5645 N->getOpcode() != ISD::ANY_EXTEND) 5646 return; 5647 5648 if (N->getOperand(0).getValueType() != MVT::i1) 5649 return; 5650 5651 if (!N->hasOneUse()) 5652 return; 5653 5654 SDLoc dl(N); 5655 EVT VT = N->getValueType(0); 5656 SDValue Cond = N->getOperand(0); 5657 SDValue ConstTrue = 5658 CurDAG->getConstant(N->getOpcode() == ISD::SIGN_EXTEND ? -1 : 1, dl, VT); 5659 SDValue ConstFalse = CurDAG->getConstant(0, dl, VT); 5660 5661 do { 5662 SDNode *User = *N->use_begin(); 5663 if (User->getNumOperands() != 2) 5664 break; 5665 5666 auto TryFold = [this, N, User, dl](SDValue Val) { 5667 SDValue UserO0 = User->getOperand(0), UserO1 = User->getOperand(1); 5668 SDValue O0 = UserO0.getNode() == N ? Val : UserO0; 5669 SDValue O1 = UserO1.getNode() == N ? Val : UserO1; 5670 5671 return CurDAG->FoldConstantArithmetic(User->getOpcode(), dl, 5672 User->getValueType(0), {O0, O1}); 5673 }; 5674 5675 // FIXME: When the semantics of the interaction between select and undef 5676 // are clearly defined, it may turn out to be unnecessary to break here. 5677 SDValue TrueRes = TryFold(ConstTrue); 5678 if (!TrueRes || TrueRes.isUndef()) 5679 break; 5680 SDValue FalseRes = TryFold(ConstFalse); 5681 if (!FalseRes || FalseRes.isUndef()) 5682 break; 5683 5684 // For us to materialize these using one instruction, we must be able to 5685 // represent them as signed 16-bit integers. 5686 uint64_t True = cast<ConstantSDNode>(TrueRes)->getZExtValue(), 5687 False = cast<ConstantSDNode>(FalseRes)->getZExtValue(); 5688 if (!isInt<16>(True) || !isInt<16>(False)) 5689 break; 5690 5691 // We can replace User with a new SELECT node, and try again to see if we 5692 // can fold the select with its user. 5693 Res = CurDAG->getSelect(dl, User->getValueType(0), Cond, TrueRes, FalseRes); 5694 N = User; 5695 ConstTrue = TrueRes; 5696 ConstFalse = FalseRes; 5697 } while (N->hasOneUse()); 5698 } 5699 5700 void PPCDAGToDAGISel::PreprocessISelDAG() { 5701 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); 5702 5703 bool MadeChange = false; 5704 while (Position != CurDAG->allnodes_begin()) { 5705 SDNode *N = &*--Position; 5706 if (N->use_empty()) 5707 continue; 5708 5709 SDValue Res; 5710 switch (N->getOpcode()) { 5711 default: break; 5712 case ISD::OR: 5713 Res = combineToCMPB(N); 5714 break; 5715 } 5716 5717 if (!Res) 5718 foldBoolExts(Res, N); 5719 5720 if (Res) { 5721 LLVM_DEBUG(dbgs() << "PPC DAG preprocessing replacing:\nOld: "); 5722 LLVM_DEBUG(N->dump(CurDAG)); 5723 LLVM_DEBUG(dbgs() << "\nNew: "); 5724 LLVM_DEBUG(Res.getNode()->dump(CurDAG)); 5725 LLVM_DEBUG(dbgs() << "\n"); 5726 5727 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res); 5728 MadeChange = true; 5729 } 5730 } 5731 5732 if (MadeChange) 5733 CurDAG->RemoveDeadNodes(); 5734 } 5735 5736 /// PostprocessISelDAG - Perform some late peephole optimizations 5737 /// on the DAG representation. 5738 void PPCDAGToDAGISel::PostprocessISelDAG() { 5739 // Skip peepholes at -O0. 5740 if (TM.getOptLevel() == CodeGenOpt::None) 5741 return; 5742 5743 PeepholePPC64(); 5744 PeepholeCROps(); 5745 PeepholePPC64ZExt(); 5746 } 5747 5748 // Check if all users of this node will become isel where the second operand 5749 // is the constant zero. If this is so, and if we can negate the condition, 5750 // then we can flip the true and false operands. This will allow the zero to 5751 // be folded with the isel so that we don't need to materialize a register 5752 // containing zero. 5753 bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode *N) { 5754 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); 5755 UI != UE; ++UI) { 5756 SDNode *User = *UI; 5757 if (!User->isMachineOpcode()) 5758 return false; 5759 if (User->getMachineOpcode() != PPC::SELECT_I4 && 5760 User->getMachineOpcode() != PPC::SELECT_I8) 5761 return false; 5762 5763 SDNode *Op2 = User->getOperand(2).getNode(); 5764 if (!Op2->isMachineOpcode()) 5765 return false; 5766 5767 if (Op2->getMachineOpcode() != PPC::LI && 5768 Op2->getMachineOpcode() != PPC::LI8) 5769 return false; 5770 5771 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op2->getOperand(0)); 5772 if (!C) 5773 return false; 5774 5775 if (!C->isNullValue()) 5776 return false; 5777 } 5778 5779 return true; 5780 } 5781 5782 void PPCDAGToDAGISel::SwapAllSelectUsers(SDNode *N) { 5783 SmallVector<SDNode *, 4> ToReplace; 5784 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); 5785 UI != UE; ++UI) { 5786 SDNode *User = *UI; 5787 assert((User->getMachineOpcode() == PPC::SELECT_I4 || 5788 User->getMachineOpcode() == PPC::SELECT_I8) && 5789 "Must have all select users"); 5790 ToReplace.push_back(User); 5791 } 5792 5793 for (SmallVector<SDNode *, 4>::iterator UI = ToReplace.begin(), 5794 UE = ToReplace.end(); UI != UE; ++UI) { 5795 SDNode *User = *UI; 5796 SDNode *ResNode = 5797 CurDAG->getMachineNode(User->getMachineOpcode(), SDLoc(User), 5798 User->getValueType(0), User->getOperand(0), 5799 User->getOperand(2), 5800 User->getOperand(1)); 5801 5802 LLVM_DEBUG(dbgs() << "CR Peephole replacing:\nOld: "); 5803 LLVM_DEBUG(User->dump(CurDAG)); 5804 LLVM_DEBUG(dbgs() << "\nNew: "); 5805 LLVM_DEBUG(ResNode->dump(CurDAG)); 5806 LLVM_DEBUG(dbgs() << "\n"); 5807 5808 ReplaceUses(User, ResNode); 5809 } 5810 } 5811 5812 void PPCDAGToDAGISel::PeepholeCROps() { 5813 bool IsModified; 5814 do { 5815 IsModified = false; 5816 for (SDNode &Node : CurDAG->allnodes()) { 5817 MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node); 5818 if (!MachineNode || MachineNode->use_empty()) 5819 continue; 5820 SDNode *ResNode = MachineNode; 5821 5822 bool Op1Set = false, Op1Unset = false, 5823 Op1Not = false, 5824 Op2Set = false, Op2Unset = false, 5825 Op2Not = false; 5826 5827 unsigned Opcode = MachineNode->getMachineOpcode(); 5828 switch (Opcode) { 5829 default: break; 5830 case PPC::CRAND: 5831 case PPC::CRNAND: 5832 case PPC::CROR: 5833 case PPC::CRXOR: 5834 case PPC::CRNOR: 5835 case PPC::CREQV: 5836 case PPC::CRANDC: 5837 case PPC::CRORC: { 5838 SDValue Op = MachineNode->getOperand(1); 5839 if (Op.isMachineOpcode()) { 5840 if (Op.getMachineOpcode() == PPC::CRSET) 5841 Op2Set = true; 5842 else if (Op.getMachineOpcode() == PPC::CRUNSET) 5843 Op2Unset = true; 5844 else if (Op.getMachineOpcode() == PPC::CRNOR && 5845 Op.getOperand(0) == Op.getOperand(1)) 5846 Op2Not = true; 5847 } 5848 LLVM_FALLTHROUGH; 5849 } 5850 case PPC::BC: 5851 case PPC::BCn: 5852 case PPC::SELECT_I4: 5853 case PPC::SELECT_I8: 5854 case PPC::SELECT_F4: 5855 case PPC::SELECT_F8: 5856 case PPC::SELECT_QFRC: 5857 case PPC::SELECT_QSRC: 5858 case PPC::SELECT_QBRC: 5859 case PPC::SELECT_SPE: 5860 case PPC::SELECT_SPE4: 5861 case PPC::SELECT_VRRC: 5862 case PPC::SELECT_VSFRC: 5863 case PPC::SELECT_VSSRC: 5864 case PPC::SELECT_VSRC: { 5865 SDValue Op = MachineNode->getOperand(0); 5866 if (Op.isMachineOpcode()) { 5867 if (Op.getMachineOpcode() == PPC::CRSET) 5868 Op1Set = true; 5869 else if (Op.getMachineOpcode() == PPC::CRUNSET) 5870 Op1Unset = true; 5871 else if (Op.getMachineOpcode() == PPC::CRNOR && 5872 Op.getOperand(0) == Op.getOperand(1)) 5873 Op1Not = true; 5874 } 5875 } 5876 break; 5877 } 5878 5879 bool SelectSwap = false; 5880 switch (Opcode) { 5881 default: break; 5882 case PPC::CRAND: 5883 if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) 5884 // x & x = x 5885 ResNode = MachineNode->getOperand(0).getNode(); 5886 else if (Op1Set) 5887 // 1 & y = y 5888 ResNode = MachineNode->getOperand(1).getNode(); 5889 else if (Op2Set) 5890 // x & 1 = x 5891 ResNode = MachineNode->getOperand(0).getNode(); 5892 else if (Op1Unset || Op2Unset) 5893 // x & 0 = 0 & y = 0 5894 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode), 5895 MVT::i1); 5896 else if (Op1Not) 5897 // ~x & y = andc(y, x) 5898 ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode), 5899 MVT::i1, MachineNode->getOperand(1), 5900 MachineNode->getOperand(0). 5901 getOperand(0)); 5902 else if (Op2Not) 5903 // x & ~y = andc(x, y) 5904 ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode), 5905 MVT::i1, MachineNode->getOperand(0), 5906 MachineNode->getOperand(1). 5907 getOperand(0)); 5908 else if (AllUsersSelectZero(MachineNode)) { 5909 ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode), 5910 MVT::i1, MachineNode->getOperand(0), 5911 MachineNode->getOperand(1)); 5912 SelectSwap = true; 5913 } 5914 break; 5915 case PPC::CRNAND: 5916 if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) 5917 // nand(x, x) -> nor(x, x) 5918 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 5919 MVT::i1, MachineNode->getOperand(0), 5920 MachineNode->getOperand(0)); 5921 else if (Op1Set) 5922 // nand(1, y) -> nor(y, y) 5923 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 5924 MVT::i1, MachineNode->getOperand(1), 5925 MachineNode->getOperand(1)); 5926 else if (Op2Set) 5927 // nand(x, 1) -> nor(x, x) 5928 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 5929 MVT::i1, MachineNode->getOperand(0), 5930 MachineNode->getOperand(0)); 5931 else if (Op1Unset || Op2Unset) 5932 // nand(x, 0) = nand(0, y) = 1 5933 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode), 5934 MVT::i1); 5935 else if (Op1Not) 5936 // nand(~x, y) = ~(~x & y) = x | ~y = orc(x, y) 5937 ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode), 5938 MVT::i1, MachineNode->getOperand(0). 5939 getOperand(0), 5940 MachineNode->getOperand(1)); 5941 else if (Op2Not) 5942 // nand(x, ~y) = ~x | y = orc(y, x) 5943 ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode), 5944 MVT::i1, MachineNode->getOperand(1). 5945 getOperand(0), 5946 MachineNode->getOperand(0)); 5947 else if (AllUsersSelectZero(MachineNode)) { 5948 ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode), 5949 MVT::i1, MachineNode->getOperand(0), 5950 MachineNode->getOperand(1)); 5951 SelectSwap = true; 5952 } 5953 break; 5954 case PPC::CROR: 5955 if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) 5956 // x | x = x 5957 ResNode = MachineNode->getOperand(0).getNode(); 5958 else if (Op1Set || Op2Set) 5959 // x | 1 = 1 | y = 1 5960 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode), 5961 MVT::i1); 5962 else if (Op1Unset) 5963 // 0 | y = y 5964 ResNode = MachineNode->getOperand(1).getNode(); 5965 else if (Op2Unset) 5966 // x | 0 = x 5967 ResNode = MachineNode->getOperand(0).getNode(); 5968 else if (Op1Not) 5969 // ~x | y = orc(y, x) 5970 ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode), 5971 MVT::i1, MachineNode->getOperand(1), 5972 MachineNode->getOperand(0). 5973 getOperand(0)); 5974 else if (Op2Not) 5975 // x | ~y = orc(x, y) 5976 ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode), 5977 MVT::i1, MachineNode->getOperand(0), 5978 MachineNode->getOperand(1). 5979 getOperand(0)); 5980 else if (AllUsersSelectZero(MachineNode)) { 5981 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 5982 MVT::i1, MachineNode->getOperand(0), 5983 MachineNode->getOperand(1)); 5984 SelectSwap = true; 5985 } 5986 break; 5987 case PPC::CRXOR: 5988 if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) 5989 // xor(x, x) = 0 5990 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode), 5991 MVT::i1); 5992 else if (Op1Set) 5993 // xor(1, y) -> nor(y, y) 5994 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 5995 MVT::i1, MachineNode->getOperand(1), 5996 MachineNode->getOperand(1)); 5997 else if (Op2Set) 5998 // xor(x, 1) -> nor(x, x) 5999 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 6000 MVT::i1, MachineNode->getOperand(0), 6001 MachineNode->getOperand(0)); 6002 else if (Op1Unset) 6003 // xor(0, y) = y 6004 ResNode = MachineNode->getOperand(1).getNode(); 6005 else if (Op2Unset) 6006 // xor(x, 0) = x 6007 ResNode = MachineNode->getOperand(0).getNode(); 6008 else if (Op1Not) 6009 // xor(~x, y) = eqv(x, y) 6010 ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode), 6011 MVT::i1, MachineNode->getOperand(0). 6012 getOperand(0), 6013 MachineNode->getOperand(1)); 6014 else if (Op2Not) 6015 // xor(x, ~y) = eqv(x, y) 6016 ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode), 6017 MVT::i1, MachineNode->getOperand(0), 6018 MachineNode->getOperand(1). 6019 getOperand(0)); 6020 else if (AllUsersSelectZero(MachineNode)) { 6021 ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode), 6022 MVT::i1, MachineNode->getOperand(0), 6023 MachineNode->getOperand(1)); 6024 SelectSwap = true; 6025 } 6026 break; 6027 case PPC::CRNOR: 6028 if (Op1Set || Op2Set) 6029 // nor(1, y) -> 0 6030 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode), 6031 MVT::i1); 6032 else if (Op1Unset) 6033 // nor(0, y) = ~y -> nor(y, y) 6034 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 6035 MVT::i1, MachineNode->getOperand(1), 6036 MachineNode->getOperand(1)); 6037 else if (Op2Unset) 6038 // nor(x, 0) = ~x 6039 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 6040 MVT::i1, MachineNode->getOperand(0), 6041 MachineNode->getOperand(0)); 6042 else if (Op1Not) 6043 // nor(~x, y) = andc(x, y) 6044 ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode), 6045 MVT::i1, MachineNode->getOperand(0). 6046 getOperand(0), 6047 MachineNode->getOperand(1)); 6048 else if (Op2Not) 6049 // nor(x, ~y) = andc(y, x) 6050 ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode), 6051 MVT::i1, MachineNode->getOperand(1). 6052 getOperand(0), 6053 MachineNode->getOperand(0)); 6054 else if (AllUsersSelectZero(MachineNode)) { 6055 ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode), 6056 MVT::i1, MachineNode->getOperand(0), 6057 MachineNode->getOperand(1)); 6058 SelectSwap = true; 6059 } 6060 break; 6061 case PPC::CREQV: 6062 if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) 6063 // eqv(x, x) = 1 6064 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode), 6065 MVT::i1); 6066 else if (Op1Set) 6067 // eqv(1, y) = y 6068 ResNode = MachineNode->getOperand(1).getNode(); 6069 else if (Op2Set) 6070 // eqv(x, 1) = x 6071 ResNode = MachineNode->getOperand(0).getNode(); 6072 else if (Op1Unset) 6073 // eqv(0, y) = ~y -> nor(y, y) 6074 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 6075 MVT::i1, MachineNode->getOperand(1), 6076 MachineNode->getOperand(1)); 6077 else if (Op2Unset) 6078 // eqv(x, 0) = ~x 6079 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 6080 MVT::i1, MachineNode->getOperand(0), 6081 MachineNode->getOperand(0)); 6082 else if (Op1Not) 6083 // eqv(~x, y) = xor(x, y) 6084 ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode), 6085 MVT::i1, MachineNode->getOperand(0). 6086 getOperand(0), 6087 MachineNode->getOperand(1)); 6088 else if (Op2Not) 6089 // eqv(x, ~y) = xor(x, y) 6090 ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode), 6091 MVT::i1, MachineNode->getOperand(0), 6092 MachineNode->getOperand(1). 6093 getOperand(0)); 6094 else if (AllUsersSelectZero(MachineNode)) { 6095 ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode), 6096 MVT::i1, MachineNode->getOperand(0), 6097 MachineNode->getOperand(1)); 6098 SelectSwap = true; 6099 } 6100 break; 6101 case PPC::CRANDC: 6102 if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) 6103 // andc(x, x) = 0 6104 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode), 6105 MVT::i1); 6106 else if (Op1Set) 6107 // andc(1, y) = ~y 6108 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 6109 MVT::i1, MachineNode->getOperand(1), 6110 MachineNode->getOperand(1)); 6111 else if (Op1Unset || Op2Set) 6112 // andc(0, y) = andc(x, 1) = 0 6113 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode), 6114 MVT::i1); 6115 else if (Op2Unset) 6116 // andc(x, 0) = x 6117 ResNode = MachineNode->getOperand(0).getNode(); 6118 else if (Op1Not) 6119 // andc(~x, y) = ~(x | y) = nor(x, y) 6120 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 6121 MVT::i1, MachineNode->getOperand(0). 6122 getOperand(0), 6123 MachineNode->getOperand(1)); 6124 else if (Op2Not) 6125 // andc(x, ~y) = x & y 6126 ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode), 6127 MVT::i1, MachineNode->getOperand(0), 6128 MachineNode->getOperand(1). 6129 getOperand(0)); 6130 else if (AllUsersSelectZero(MachineNode)) { 6131 ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode), 6132 MVT::i1, MachineNode->getOperand(1), 6133 MachineNode->getOperand(0)); 6134 SelectSwap = true; 6135 } 6136 break; 6137 case PPC::CRORC: 6138 if (MachineNode->getOperand(0) == MachineNode->getOperand(1)) 6139 // orc(x, x) = 1 6140 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode), 6141 MVT::i1); 6142 else if (Op1Set || Op2Unset) 6143 // orc(1, y) = orc(x, 0) = 1 6144 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode), 6145 MVT::i1); 6146 else if (Op2Set) 6147 // orc(x, 1) = x 6148 ResNode = MachineNode->getOperand(0).getNode(); 6149 else if (Op1Unset) 6150 // orc(0, y) = ~y 6151 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode), 6152 MVT::i1, MachineNode->getOperand(1), 6153 MachineNode->getOperand(1)); 6154 else if (Op1Not) 6155 // orc(~x, y) = ~(x & y) = nand(x, y) 6156 ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode), 6157 MVT::i1, MachineNode->getOperand(0). 6158 getOperand(0), 6159 MachineNode->getOperand(1)); 6160 else if (Op2Not) 6161 // orc(x, ~y) = x | y 6162 ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode), 6163 MVT::i1, MachineNode->getOperand(0), 6164 MachineNode->getOperand(1). 6165 getOperand(0)); 6166 else if (AllUsersSelectZero(MachineNode)) { 6167 ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode), 6168 MVT::i1, MachineNode->getOperand(1), 6169 MachineNode->getOperand(0)); 6170 SelectSwap = true; 6171 } 6172 break; 6173 case PPC::SELECT_I4: 6174 case PPC::SELECT_I8: 6175 case PPC::SELECT_F4: 6176 case PPC::SELECT_F8: 6177 case PPC::SELECT_QFRC: 6178 case PPC::SELECT_QSRC: 6179 case PPC::SELECT_QBRC: 6180 case PPC::SELECT_SPE: 6181 case PPC::SELECT_SPE4: 6182 case PPC::SELECT_VRRC: 6183 case PPC::SELECT_VSFRC: 6184 case PPC::SELECT_VSSRC: 6185 case PPC::SELECT_VSRC: 6186 if (Op1Set) 6187 ResNode = MachineNode->getOperand(1).getNode(); 6188 else if (Op1Unset) 6189 ResNode = MachineNode->getOperand(2).getNode(); 6190 else if (Op1Not) 6191 ResNode = CurDAG->getMachineNode(MachineNode->getMachineOpcode(), 6192 SDLoc(MachineNode), 6193 MachineNode->getValueType(0), 6194 MachineNode->getOperand(0). 6195 getOperand(0), 6196 MachineNode->getOperand(2), 6197 MachineNode->getOperand(1)); 6198 break; 6199 case PPC::BC: 6200 case PPC::BCn: 6201 if (Op1Not) 6202 ResNode = CurDAG->getMachineNode(Opcode == PPC::BC ? PPC::BCn : 6203 PPC::BC, 6204 SDLoc(MachineNode), 6205 MVT::Other, 6206 MachineNode->getOperand(0). 6207 getOperand(0), 6208 MachineNode->getOperand(1), 6209 MachineNode->getOperand(2)); 6210 // FIXME: Handle Op1Set, Op1Unset here too. 6211 break; 6212 } 6213 6214 // If we're inverting this node because it is used only by selects that 6215 // we'd like to swap, then swap the selects before the node replacement. 6216 if (SelectSwap) 6217 SwapAllSelectUsers(MachineNode); 6218 6219 if (ResNode != MachineNode) { 6220 LLVM_DEBUG(dbgs() << "CR Peephole replacing:\nOld: "); 6221 LLVM_DEBUG(MachineNode->dump(CurDAG)); 6222 LLVM_DEBUG(dbgs() << "\nNew: "); 6223 LLVM_DEBUG(ResNode->dump(CurDAG)); 6224 LLVM_DEBUG(dbgs() << "\n"); 6225 6226 ReplaceUses(MachineNode, ResNode); 6227 IsModified = true; 6228 } 6229 } 6230 if (IsModified) 6231 CurDAG->RemoveDeadNodes(); 6232 } while (IsModified); 6233 } 6234 6235 // Gather the set of 32-bit operations that are known to have their 6236 // higher-order 32 bits zero, where ToPromote contains all such operations. 6237 static bool PeepholePPC64ZExtGather(SDValue Op32, 6238 SmallPtrSetImpl<SDNode *> &ToPromote) { 6239 if (!Op32.isMachineOpcode()) 6240 return false; 6241 6242 // First, check for the "frontier" instructions (those that will clear the 6243 // higher-order 32 bits. 6244 6245 // For RLWINM and RLWNM, we need to make sure that the mask does not wrap 6246 // around. If it does not, then these instructions will clear the 6247 // higher-order bits. 6248 if ((Op32.getMachineOpcode() == PPC::RLWINM || 6249 Op32.getMachineOpcode() == PPC::RLWNM) && 6250 Op32.getConstantOperandVal(2) <= Op32.getConstantOperandVal(3)) { 6251 ToPromote.insert(Op32.getNode()); 6252 return true; 6253 } 6254 6255 // SLW and SRW always clear the higher-order bits. 6256 if (Op32.getMachineOpcode() == PPC::SLW || 6257 Op32.getMachineOpcode() == PPC::SRW) { 6258 ToPromote.insert(Op32.getNode()); 6259 return true; 6260 } 6261 6262 // For LI and LIS, we need the immediate to be positive (so that it is not 6263 // sign extended). 6264 if (Op32.getMachineOpcode() == PPC::LI || 6265 Op32.getMachineOpcode() == PPC::LIS) { 6266 if (!isUInt<15>(Op32.getConstantOperandVal(0))) 6267 return false; 6268 6269 ToPromote.insert(Op32.getNode()); 6270 return true; 6271 } 6272 6273 // LHBRX and LWBRX always clear the higher-order bits. 6274 if (Op32.getMachineOpcode() == PPC::LHBRX || 6275 Op32.getMachineOpcode() == PPC::LWBRX) { 6276 ToPromote.insert(Op32.getNode()); 6277 return true; 6278 } 6279 6280 // CNT[LT]ZW always produce a 64-bit value in [0,32], and so is zero extended. 6281 if (Op32.getMachineOpcode() == PPC::CNTLZW || 6282 Op32.getMachineOpcode() == PPC::CNTTZW) { 6283 ToPromote.insert(Op32.getNode()); 6284 return true; 6285 } 6286 6287 // Next, check for those instructions we can look through. 6288 6289 // Assuming the mask does not wrap around, then the higher-order bits are 6290 // taken directly from the first operand. 6291 if (Op32.getMachineOpcode() == PPC::RLWIMI && 6292 Op32.getConstantOperandVal(3) <= Op32.getConstantOperandVal(4)) { 6293 SmallPtrSet<SDNode *, 16> ToPromote1; 6294 if (!PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1)) 6295 return false; 6296 6297 ToPromote.insert(Op32.getNode()); 6298 ToPromote.insert(ToPromote1.begin(), ToPromote1.end()); 6299 return true; 6300 } 6301 6302 // For OR, the higher-order bits are zero if that is true for both operands. 6303 // For SELECT_I4, the same is true (but the relevant operand numbers are 6304 // shifted by 1). 6305 if (Op32.getMachineOpcode() == PPC::OR || 6306 Op32.getMachineOpcode() == PPC::SELECT_I4) { 6307 unsigned B = Op32.getMachineOpcode() == PPC::SELECT_I4 ? 1 : 0; 6308 SmallPtrSet<SDNode *, 16> ToPromote1; 6309 if (!PeepholePPC64ZExtGather(Op32.getOperand(B+0), ToPromote1)) 6310 return false; 6311 if (!PeepholePPC64ZExtGather(Op32.getOperand(B+1), ToPromote1)) 6312 return false; 6313 6314 ToPromote.insert(Op32.getNode()); 6315 ToPromote.insert(ToPromote1.begin(), ToPromote1.end()); 6316 return true; 6317 } 6318 6319 // For ORI and ORIS, we need the higher-order bits of the first operand to be 6320 // zero, and also for the constant to be positive (so that it is not sign 6321 // extended). 6322 if (Op32.getMachineOpcode() == PPC::ORI || 6323 Op32.getMachineOpcode() == PPC::ORIS) { 6324 SmallPtrSet<SDNode *, 16> ToPromote1; 6325 if (!PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1)) 6326 return false; 6327 if (!isUInt<15>(Op32.getConstantOperandVal(1))) 6328 return false; 6329 6330 ToPromote.insert(Op32.getNode()); 6331 ToPromote.insert(ToPromote1.begin(), ToPromote1.end()); 6332 return true; 6333 } 6334 6335 // The higher-order bits of AND are zero if that is true for at least one of 6336 // the operands. 6337 if (Op32.getMachineOpcode() == PPC::AND) { 6338 SmallPtrSet<SDNode *, 16> ToPromote1, ToPromote2; 6339 bool Op0OK = 6340 PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1); 6341 bool Op1OK = 6342 PeepholePPC64ZExtGather(Op32.getOperand(1), ToPromote2); 6343 if (!Op0OK && !Op1OK) 6344 return false; 6345 6346 ToPromote.insert(Op32.getNode()); 6347 6348 if (Op0OK) 6349 ToPromote.insert(ToPromote1.begin(), ToPromote1.end()); 6350 6351 if (Op1OK) 6352 ToPromote.insert(ToPromote2.begin(), ToPromote2.end()); 6353 6354 return true; 6355 } 6356 6357 // For ANDI and ANDIS, the higher-order bits are zero if either that is true 6358 // of the first operand, or if the second operand is positive (so that it is 6359 // not sign extended). 6360 if (Op32.getMachineOpcode() == PPC::ANDI_rec || 6361 Op32.getMachineOpcode() == PPC::ANDIS_rec) { 6362 SmallPtrSet<SDNode *, 16> ToPromote1; 6363 bool Op0OK = 6364 PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1); 6365 bool Op1OK = isUInt<15>(Op32.getConstantOperandVal(1)); 6366 if (!Op0OK && !Op1OK) 6367 return false; 6368 6369 ToPromote.insert(Op32.getNode()); 6370 6371 if (Op0OK) 6372 ToPromote.insert(ToPromote1.begin(), ToPromote1.end()); 6373 6374 return true; 6375 } 6376 6377 return false; 6378 } 6379 6380 void PPCDAGToDAGISel::PeepholePPC64ZExt() { 6381 if (!PPCSubTarget->isPPC64()) 6382 return; 6383 6384 // When we zero-extend from i32 to i64, we use a pattern like this: 6385 // def : Pat<(i64 (zext i32:$in)), 6386 // (RLDICL (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $in, sub_32), 6387 // 0, 32)>; 6388 // There are several 32-bit shift/rotate instructions, however, that will 6389 // clear the higher-order bits of their output, rendering the RLDICL 6390 // unnecessary. When that happens, we remove it here, and redefine the 6391 // relevant 32-bit operation to be a 64-bit operation. 6392 6393 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); 6394 6395 bool MadeChange = false; 6396 while (Position != CurDAG->allnodes_begin()) { 6397 SDNode *N = &*--Position; 6398 // Skip dead nodes and any non-machine opcodes. 6399 if (N->use_empty() || !N->isMachineOpcode()) 6400 continue; 6401 6402 if (N->getMachineOpcode() != PPC::RLDICL) 6403 continue; 6404 6405 if (N->getConstantOperandVal(1) != 0 || 6406 N->getConstantOperandVal(2) != 32) 6407 continue; 6408 6409 SDValue ISR = N->getOperand(0); 6410 if (!ISR.isMachineOpcode() || 6411 ISR.getMachineOpcode() != TargetOpcode::INSERT_SUBREG) 6412 continue; 6413 6414 if (!ISR.hasOneUse()) 6415 continue; 6416 6417 if (ISR.getConstantOperandVal(2) != PPC::sub_32) 6418 continue; 6419 6420 SDValue IDef = ISR.getOperand(0); 6421 if (!IDef.isMachineOpcode() || 6422 IDef.getMachineOpcode() != TargetOpcode::IMPLICIT_DEF) 6423 continue; 6424 6425 // We now know that we're looking at a canonical i32 -> i64 zext. See if we 6426 // can get rid of it. 6427 6428 SDValue Op32 = ISR->getOperand(1); 6429 if (!Op32.isMachineOpcode()) 6430 continue; 6431 6432 // There are some 32-bit instructions that always clear the high-order 32 6433 // bits, there are also some instructions (like AND) that we can look 6434 // through. 6435 SmallPtrSet<SDNode *, 16> ToPromote; 6436 if (!PeepholePPC64ZExtGather(Op32, ToPromote)) 6437 continue; 6438 6439 // If the ToPromote set contains nodes that have uses outside of the set 6440 // (except for the original INSERT_SUBREG), then abort the transformation. 6441 bool OutsideUse = false; 6442 for (SDNode *PN : ToPromote) { 6443 for (SDNode *UN : PN->uses()) { 6444 if (!ToPromote.count(UN) && UN != ISR.getNode()) { 6445 OutsideUse = true; 6446 break; 6447 } 6448 } 6449 6450 if (OutsideUse) 6451 break; 6452 } 6453 if (OutsideUse) 6454 continue; 6455 6456 MadeChange = true; 6457 6458 // We now know that this zero extension can be removed by promoting to 6459 // nodes in ToPromote to 64-bit operations, where for operations in the 6460 // frontier of the set, we need to insert INSERT_SUBREGs for their 6461 // operands. 6462 for (SDNode *PN : ToPromote) { 6463 unsigned NewOpcode; 6464 switch (PN->getMachineOpcode()) { 6465 default: 6466 llvm_unreachable("Don't know the 64-bit variant of this instruction"); 6467 case PPC::RLWINM: NewOpcode = PPC::RLWINM8; break; 6468 case PPC::RLWNM: NewOpcode = PPC::RLWNM8; break; 6469 case PPC::SLW: NewOpcode = PPC::SLW8; break; 6470 case PPC::SRW: NewOpcode = PPC::SRW8; break; 6471 case PPC::LI: NewOpcode = PPC::LI8; break; 6472 case PPC::LIS: NewOpcode = PPC::LIS8; break; 6473 case PPC::LHBRX: NewOpcode = PPC::LHBRX8; break; 6474 case PPC::LWBRX: NewOpcode = PPC::LWBRX8; break; 6475 case PPC::CNTLZW: NewOpcode = PPC::CNTLZW8; break; 6476 case PPC::CNTTZW: NewOpcode = PPC::CNTTZW8; break; 6477 case PPC::RLWIMI: NewOpcode = PPC::RLWIMI8; break; 6478 case PPC::OR: NewOpcode = PPC::OR8; break; 6479 case PPC::SELECT_I4: NewOpcode = PPC::SELECT_I8; break; 6480 case PPC::ORI: NewOpcode = PPC::ORI8; break; 6481 case PPC::ORIS: NewOpcode = PPC::ORIS8; break; 6482 case PPC::AND: NewOpcode = PPC::AND8; break; 6483 case PPC::ANDI_rec: 6484 NewOpcode = PPC::ANDI8_rec; 6485 break; 6486 case PPC::ANDIS_rec: 6487 NewOpcode = PPC::ANDIS8_rec; 6488 break; 6489 } 6490 6491 // Note: During the replacement process, the nodes will be in an 6492 // inconsistent state (some instructions will have operands with values 6493 // of the wrong type). Once done, however, everything should be right 6494 // again. 6495 6496 SmallVector<SDValue, 4> Ops; 6497 for (const SDValue &V : PN->ops()) { 6498 if (!ToPromote.count(V.getNode()) && V.getValueType() == MVT::i32 && 6499 !isa<ConstantSDNode>(V)) { 6500 SDValue ReplOpOps[] = { ISR.getOperand(0), V, ISR.getOperand(2) }; 6501 SDNode *ReplOp = 6502 CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, SDLoc(V), 6503 ISR.getNode()->getVTList(), ReplOpOps); 6504 Ops.push_back(SDValue(ReplOp, 0)); 6505 } else { 6506 Ops.push_back(V); 6507 } 6508 } 6509 6510 // Because all to-be-promoted nodes only have users that are other 6511 // promoted nodes (or the original INSERT_SUBREG), we can safely replace 6512 // the i32 result value type with i64. 6513 6514 SmallVector<EVT, 2> NewVTs; 6515 SDVTList VTs = PN->getVTList(); 6516 for (unsigned i = 0, ie = VTs.NumVTs; i != ie; ++i) 6517 if (VTs.VTs[i] == MVT::i32) 6518 NewVTs.push_back(MVT::i64); 6519 else 6520 NewVTs.push_back(VTs.VTs[i]); 6521 6522 LLVM_DEBUG(dbgs() << "PPC64 ZExt Peephole morphing:\nOld: "); 6523 LLVM_DEBUG(PN->dump(CurDAG)); 6524 6525 CurDAG->SelectNodeTo(PN, NewOpcode, CurDAG->getVTList(NewVTs), Ops); 6526 6527 LLVM_DEBUG(dbgs() << "\nNew: "); 6528 LLVM_DEBUG(PN->dump(CurDAG)); 6529 LLVM_DEBUG(dbgs() << "\n"); 6530 } 6531 6532 // Now we replace the original zero extend and its associated INSERT_SUBREG 6533 // with the value feeding the INSERT_SUBREG (which has now been promoted to 6534 // return an i64). 6535 6536 LLVM_DEBUG(dbgs() << "PPC64 ZExt Peephole replacing:\nOld: "); 6537 LLVM_DEBUG(N->dump(CurDAG)); 6538 LLVM_DEBUG(dbgs() << "\nNew: "); 6539 LLVM_DEBUG(Op32.getNode()->dump(CurDAG)); 6540 LLVM_DEBUG(dbgs() << "\n"); 6541 6542 ReplaceUses(N, Op32.getNode()); 6543 } 6544 6545 if (MadeChange) 6546 CurDAG->RemoveDeadNodes(); 6547 } 6548 6549 void PPCDAGToDAGISel::PeepholePPC64() { 6550 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); 6551 6552 while (Position != CurDAG->allnodes_begin()) { 6553 SDNode *N = &*--Position; 6554 // Skip dead nodes and any non-machine opcodes. 6555 if (N->use_empty() || !N->isMachineOpcode()) 6556 continue; 6557 6558 unsigned FirstOp; 6559 unsigned StorageOpcode = N->getMachineOpcode(); 6560 bool RequiresMod4Offset = false; 6561 6562 switch (StorageOpcode) { 6563 default: continue; 6564 6565 case PPC::LWA: 6566 case PPC::LD: 6567 case PPC::DFLOADf64: 6568 case PPC::DFLOADf32: 6569 RequiresMod4Offset = true; 6570 LLVM_FALLTHROUGH; 6571 case PPC::LBZ: 6572 case PPC::LBZ8: 6573 case PPC::LFD: 6574 case PPC::LFS: 6575 case PPC::LHA: 6576 case PPC::LHA8: 6577 case PPC::LHZ: 6578 case PPC::LHZ8: 6579 case PPC::LWZ: 6580 case PPC::LWZ8: 6581 FirstOp = 0; 6582 break; 6583 6584 case PPC::STD: 6585 case PPC::DFSTOREf64: 6586 case PPC::DFSTOREf32: 6587 RequiresMod4Offset = true; 6588 LLVM_FALLTHROUGH; 6589 case PPC::STB: 6590 case PPC::STB8: 6591 case PPC::STFD: 6592 case PPC::STFS: 6593 case PPC::STH: 6594 case PPC::STH8: 6595 case PPC::STW: 6596 case PPC::STW8: 6597 FirstOp = 1; 6598 break; 6599 } 6600 6601 // If this is a load or store with a zero offset, or within the alignment, 6602 // we may be able to fold an add-immediate into the memory operation. 6603 // The check against alignment is below, as it can't occur until we check 6604 // the arguments to N 6605 if (!isa<ConstantSDNode>(N->getOperand(FirstOp))) 6606 continue; 6607 6608 SDValue Base = N->getOperand(FirstOp + 1); 6609 if (!Base.isMachineOpcode()) 6610 continue; 6611 6612 unsigned Flags = 0; 6613 bool ReplaceFlags = true; 6614 6615 // When the feeding operation is an add-immediate of some sort, 6616 // determine whether we need to add relocation information to the 6617 // target flags on the immediate operand when we fold it into the 6618 // load instruction. 6619 // 6620 // For something like ADDItocL, the relocation information is 6621 // inferred from the opcode; when we process it in the AsmPrinter, 6622 // we add the necessary relocation there. A load, though, can receive 6623 // relocation from various flavors of ADDIxxx, so we need to carry 6624 // the relocation information in the target flags. 6625 switch (Base.getMachineOpcode()) { 6626 default: continue; 6627 6628 case PPC::ADDI8: 6629 case PPC::ADDI: 6630 // In some cases (such as TLS) the relocation information 6631 // is already in place on the operand, so copying the operand 6632 // is sufficient. 6633 ReplaceFlags = false; 6634 // For these cases, the immediate may not be divisible by 4, in 6635 // which case the fold is illegal for DS-form instructions. (The 6636 // other cases provide aligned addresses and are always safe.) 6637 if (RequiresMod4Offset && 6638 (!isa<ConstantSDNode>(Base.getOperand(1)) || 6639 Base.getConstantOperandVal(1) % 4 != 0)) 6640 continue; 6641 break; 6642 case PPC::ADDIdtprelL: 6643 Flags = PPCII::MO_DTPREL_LO; 6644 break; 6645 case PPC::ADDItlsldL: 6646 Flags = PPCII::MO_TLSLD_LO; 6647 break; 6648 case PPC::ADDItocL: 6649 Flags = PPCII::MO_TOC_LO; 6650 break; 6651 } 6652 6653 SDValue ImmOpnd = Base.getOperand(1); 6654 6655 // On PPC64, the TOC base pointer is guaranteed by the ABI only to have 6656 // 8-byte alignment, and so we can only use offsets less than 8 (otherwise, 6657 // we might have needed different @ha relocation values for the offset 6658 // pointers). 6659 int MaxDisplacement = 7; 6660 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) { 6661 const GlobalValue *GV = GA->getGlobal(); 6662 MaxDisplacement = std::min((int) GV->getAlignment() - 1, MaxDisplacement); 6663 } 6664 6665 bool UpdateHBase = false; 6666 SDValue HBase = Base.getOperand(0); 6667 6668 int Offset = N->getConstantOperandVal(FirstOp); 6669 if (ReplaceFlags) { 6670 if (Offset < 0 || Offset > MaxDisplacement) { 6671 // If we have a addi(toc@l)/addis(toc@ha) pair, and the addis has only 6672 // one use, then we can do this for any offset, we just need to also 6673 // update the offset (i.e. the symbol addend) on the addis also. 6674 if (Base.getMachineOpcode() != PPC::ADDItocL) 6675 continue; 6676 6677 if (!HBase.isMachineOpcode() || 6678 HBase.getMachineOpcode() != PPC::ADDIStocHA8) 6679 continue; 6680 6681 if (!Base.hasOneUse() || !HBase.hasOneUse()) 6682 continue; 6683 6684 SDValue HImmOpnd = HBase.getOperand(1); 6685 if (HImmOpnd != ImmOpnd) 6686 continue; 6687 6688 UpdateHBase = true; 6689 } 6690 } else { 6691 // If we're directly folding the addend from an addi instruction, then: 6692 // 1. In general, the offset on the memory access must be zero. 6693 // 2. If the addend is a constant, then it can be combined with a 6694 // non-zero offset, but only if the result meets the encoding 6695 // requirements. 6696 if (auto *C = dyn_cast<ConstantSDNode>(ImmOpnd)) { 6697 Offset += C->getSExtValue(); 6698 6699 if (RequiresMod4Offset && (Offset % 4) != 0) 6700 continue; 6701 6702 if (!isInt<16>(Offset)) 6703 continue; 6704 6705 ImmOpnd = CurDAG->getTargetConstant(Offset, SDLoc(ImmOpnd), 6706 ImmOpnd.getValueType()); 6707 } else if (Offset != 0) { 6708 continue; 6709 } 6710 } 6711 6712 // We found an opportunity. Reverse the operands from the add 6713 // immediate and substitute them into the load or store. If 6714 // needed, update the target flags for the immediate operand to 6715 // reflect the necessary relocation information. 6716 LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: "); 6717 LLVM_DEBUG(Base->dump(CurDAG)); 6718 LLVM_DEBUG(dbgs() << "\nN: "); 6719 LLVM_DEBUG(N->dump(CurDAG)); 6720 LLVM_DEBUG(dbgs() << "\n"); 6721 6722 // If the relocation information isn't already present on the 6723 // immediate operand, add it now. 6724 if (ReplaceFlags) { 6725 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) { 6726 SDLoc dl(GA); 6727 const GlobalValue *GV = GA->getGlobal(); 6728 // We can't perform this optimization for data whose alignment 6729 // is insufficient for the instruction encoding. 6730 if (GV->getAlignment() < 4 && 6731 (RequiresMod4Offset || (Offset % 4) != 0)) { 6732 LLVM_DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n"); 6733 continue; 6734 } 6735 ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, Offset, Flags); 6736 } else if (ConstantPoolSDNode *CP = 6737 dyn_cast<ConstantPoolSDNode>(ImmOpnd)) { 6738 const Constant *C = CP->getConstVal(); 6739 ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64, CP->getAlign(), 6740 Offset, Flags); 6741 } 6742 } 6743 6744 if (FirstOp == 1) // Store 6745 (void)CurDAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd, 6746 Base.getOperand(0), N->getOperand(3)); 6747 else // Load 6748 (void)CurDAG->UpdateNodeOperands(N, ImmOpnd, Base.getOperand(0), 6749 N->getOperand(2)); 6750 6751 if (UpdateHBase) 6752 (void)CurDAG->UpdateNodeOperands(HBase.getNode(), HBase.getOperand(0), 6753 ImmOpnd); 6754 6755 // The add-immediate may now be dead, in which case remove it. 6756 if (Base.getNode()->use_empty()) 6757 CurDAG->RemoveDeadNode(Base.getNode()); 6758 } 6759 } 6760 6761 /// createPPCISelDag - This pass converts a legalized DAG into a 6762 /// PowerPC-specific DAG, ready for instruction scheduling. 6763 /// 6764 FunctionPass *llvm::createPPCISelDag(PPCTargetMachine &TM, 6765 CodeGenOpt::Level OptLevel) { 6766 return new PPCDAGToDAGISel(TM, OptLevel); 6767 } 6768